[VPlan] Move auxiliary declarations out of VPlan.h (NFC). (#124104)

Nothing in VPlan.h directly depends on VPTransformState, VPCostContext, VPFRange, VPlanPrinter or VPSlotTracker. Move them out to a separate header to reduce the size of widely used VPlan.h. This is a first step towards more cleanly separating declarations in VPlan. Besides reducing VPlan.h's size, this also allows including additional VPlan-related headers in VPlanHelpers.h for use there. An example is using VPDominatorTree in VPTransformState (https://github.com/llvm/llvm-project/pull/117138). PR: https://github.com/llvm/llvm-project/pull/124104
2025-04-16 09:16:31 +00:00 · 2025-02-02 13:44:07 +00:00 · 2025-02-02 13:44:07 +00:00 · 5008277322
commit 5008277322
parent 642e84f001
13 changed files with 714 additions and 625 deletions
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@ -40,6 +40,7 @@ class OptimizationRemarkEmitter;
 class TargetTransformInfo;
 class TargetLibraryInfo;
 class VPRecipeBuilder;
+struct VFRange;

 /// VPlan-based builder utility analogous to IRBuilder.
 class VPBuilder {
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -59,6 +59,7 @@
 #include "VPlan.h"
 #include "VPlanAnalysis.h"
 #include "VPlanHCFGBuilder.h"
+#include "VPlanHelpers.h"
 #include "VPlanPatternMatch.h"
 #include "VPlanTransforms.h"
 #include "VPlanUtils.h"
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@ -23,6 +23,7 @@ class LoopVectorizationCostModel;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 struct HistogramInfo;
+struct VFRange;

 /// A chain of instructions that form a partial reduction.
 /// Designed to match: reduction_bin_op (bin_op (extend (A), (extend (B))),
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@ -19,6 +19,7 @@
 #include "VPlan.h"
 #include "LoopVectorizationPlanner.h"
 #include "VPlanCFG.h"
+#include "VPlanHelpers.h"
 #include "VPlanPatternMatch.h"
 #include "VPlanTransforms.h"
 #include "VPlanUtils.h"
@ -400,8 +401,8 @@ void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
  set(Def, VectorValue);
 }

-BasicBlock *
-VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
+BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
+  auto &CFG = State.CFG;
  // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
  // Pred stands for Predessor. Prev stands for Previous - last visited/created.
  BasicBlock *PrevBB = CFG.PrevBB;
@ -412,7 +413,8 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
  return NewBB;
 }

-void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
+void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
+  auto &CFG = State.CFG;
  BasicBlock *NewBB = CFG.VPBB2IRBB[this];
  // Hook up the new basic block to its predecessors.
  for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@ -467,7 +469,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
        "other blocks must be terminated by a branch");
  }

-  connectToPredecessors(State->CFG);
+  connectToPredecessors(*State);
 }

 VPIRBasicBlock *VPIRBasicBlock::clone() {
@ -494,7 +496,7 @@ void VPBasicBlock::execute(VPTransformState *State) {
    //  * the exit of a replicate region.
    State->CFG.VPBB2IRBB[this] = NewBB;
  } else {
-    NewBB = createEmptyBasicBlock(State->CFG);
+    NewBB = createEmptyBasicBlock(*State);

    State->Builder.SetInsertPoint(NewBB);
    // Temporarily terminate with unreachable until CFG is rewired.
@ -514,7 +516,7 @@ void VPBasicBlock::execute(VPTransformState *State) {

    State->CFG.PrevBB = NewBB;
    State->CFG.VPBB2IRBB[this] = NewBB;
-    connectToPredecessors(State->CFG);
+    connectToPredecessors(*State);
  }

  // 2. Fill the IR basic block with IR instructions.
@ -623,6 +625,11 @@ bool VPBasicBlock::isExiting() const {
 }

 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPBlockBase::print(raw_ostream &O) const {
+  VPSlotTracker SlotTracker(getPlan());
+  print(O, "", SlotTracker);
+}
+
 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
  if (getSuccessors().empty()) {
    O << Indent << "No successors\n";
@ -1471,58 +1478,6 @@ void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
 }
 #endif

-void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
-                                          Old2NewTy &Old2New,
-                                          InterleavedAccessInfo &IAI) {
-  ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
-      RPOT(Region->getEntry());
-  for (VPBlockBase *Base : RPOT) {
-    visitBlock(Base, Old2New, IAI);
-  }
-}
-
-void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
-                                         InterleavedAccessInfo &IAI) {
-  if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
-    for (VPRecipeBase &VPI : *VPBB) {
-      if (isa<VPWidenPHIRecipe>(&VPI))
-        continue;
-      assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
-      auto *VPInst = cast<VPInstruction>(&VPI);
-
-      auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
-      if (!Inst)
-        continue;
-      auto *IG = IAI.getInterleaveGroup(Inst);
-      if (!IG)
-        continue;
-
-      auto NewIGIter = Old2New.find(IG);
-      if (NewIGIter == Old2New.end())
-        Old2New[IG] = new InterleaveGroup<VPInstruction>(
-            IG->getFactor(), IG->isReverse(), IG->getAlign());
-
-      if (Inst == IG->getInsertPos())
-        Old2New[IG]->setInsertPos(VPInst);
-
-      InterleaveGroupMap[VPInst] = Old2New[IG];
-      InterleaveGroupMap[VPInst]->insertMember(
-          VPInst, IG->getIndex(Inst),
-          Align(IG->isReverse() ? (-1) * int(IG->getFactor())
-                                : IG->getFactor()));
-    }
-  } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
-    visitRegion(Region, Old2New, IAI);
-  else
-    llvm_unreachable("Unsupported kind of VPBlock.");
-}
-
-VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
-                                                 InterleavedAccessInfo &IAI) {
-  Old2NewTy Old2New;
-  visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
-}
-
 void VPSlotTracker::assignName(const VPValue *V) {
  assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
  auto *UV = V->getUnderlyingValue();
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@ -17,7 +17,6 @@
 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
 ///    instruction;
 /// 5. The VPlan class holding a candidate for vectorization;
-/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
 /// These are documented in docs/VectorizationPlan.rst.
 //
 //===----------------------------------------------------------------------===//
@ -34,10 +33,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/FMF.h"
@ -54,7 +50,7 @@ class BasicBlock;
 class DominatorTree;
 class InnerLoopVectorizer;
 class IRBuilderBase;
-class LoopInfo;
+struct VPTransformState;
 class raw_ostream;
 class RecurrenceDescriptor;
 class SCEV;
@ -63,11 +59,11 @@ class VPBasicBlock;
 class VPBuilder;
 class VPRegionBlock;
 class VPlan;
+class VPLane;
 class VPReplicateRecipe;
 class VPlanSlp;
 class Value;
 class LoopVectorizationCostModel;
-class LoopVersioning;

 struct VPCostContext;

@ -75,318 +71,8 @@ namespace Intrinsic {
 typedef unsigned ID;
 }

-/// Returns a calculation for the total number of elements for a given \p VF.
-/// For fixed width vectors this value is a constant, whereas for scalable
-/// vectors it is an expression determined at runtime.
-Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
-
-/// Return a value for Step multiplied by VF.
-Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
-                       int64_t Step);
-
-/// A helper function that returns the reciprocal of the block probability of
-/// predicated blocks. If we return X, we are assuming the predicated block
-/// will execute once for every X iterations of the loop header.
-///
-/// TODO: We should use actual block probability here, if available. Currently,
-///       we always assume predicated blocks have a 50% chance of executing.
-inline unsigned getReciprocalPredBlockProb() { return 2; }
-
-/// A range of powers-of-2 vectorization factors with fixed start and
-/// adjustable end. The range includes start and excludes end, e.g.,:
-/// [1, 16) = {1, 2, 4, 8}
-struct VFRange {
-  // A power of 2.
-  const ElementCount Start;
-
-  // A power of 2. If End <= Start range is empty.
-  ElementCount End;
-
-  bool isEmpty() const {
-    return End.getKnownMinValue() <= Start.getKnownMinValue();
-  }
-
-  VFRange(const ElementCount &Start, const ElementCount &End)
-      : Start(Start), End(End) {
-    assert(Start.isScalable() == End.isScalable() &&
-           "Both Start and End should have the same scalable flag");
-    assert(isPowerOf2_32(Start.getKnownMinValue()) &&
-           "Expected Start to be a power of 2");
-    assert(isPowerOf2_32(End.getKnownMinValue()) &&
-           "Expected End to be a power of 2");
-  }
-
-  /// Iterator to iterate over vectorization factors in a VFRange.
-  class iterator
-      : public iterator_facade_base<iterator, std::forward_iterator_tag,
-                                    ElementCount> {
-    ElementCount VF;
-
-  public:
-    iterator(ElementCount VF) : VF(VF) {}
-
-    bool operator==(const iterator &Other) const { return VF == Other.VF; }
-
-    ElementCount operator*() const { return VF; }
-
-    iterator &operator++() {
-      VF *= 2;
-      return *this;
-    }
-  };
-
-  iterator begin() { return iterator(Start); }
-  iterator end() {
-    assert(isPowerOf2_32(End.getKnownMinValue()));
-    return iterator(End);
-  }
-};
-
 using VPlanPtr = std::unique_ptr<VPlan>;

-/// In what follows, the term "input IR" refers to code that is fed into the
-/// vectorizer whereas the term "output IR" refers to code that is generated by
-/// the vectorizer.
-
-/// VPLane provides a way to access lanes in both fixed width and scalable
-/// vectors, where for the latter the lane index sometimes needs calculating
-/// as a runtime expression.
-class VPLane {
-public:
-  /// Kind describes how to interpret Lane.
-  enum class Kind : uint8_t {
-    /// For First, Lane is the index into the first N elements of a
-    /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
-    First,
-    /// For ScalableLast, Lane is the offset from the start of the last
-    /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
-    /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
-    /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
-    ScalableLast
-  };
-
-private:
-  /// in [0..VF)
-  unsigned Lane;
-
-  /// Indicates how the Lane should be interpreted, as described above.
-  Kind LaneKind;
-
-public:
-  VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
-  VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
-
-  static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
-
-  static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
-    assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
-           "trying to extract with invalid offset");
-    unsigned LaneOffset = VF.getKnownMinValue() - Offset;
-    Kind LaneKind;
-    if (VF.isScalable())
-      // In this case 'LaneOffset' refers to the offset from the start of the
-      // last subvector with VF.getKnownMinValue() elements.
-      LaneKind = VPLane::Kind::ScalableLast;
-    else
-      LaneKind = VPLane::Kind::First;
-    return VPLane(LaneOffset, LaneKind);
-  }
-
-  static VPLane getLastLaneForVF(const ElementCount &VF) {
-    return getLaneFromEnd(VF, 1);
-  }
-
-  /// Returns a compile-time known value for the lane index and asserts if the
-  /// lane can only be calculated at runtime.
-  unsigned getKnownLane() const {
-    assert(LaneKind == Kind::First);
-    return Lane;
-  }
-
-  /// Returns an expression describing the lane index that can be used at
-  /// runtime.
-  Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
-
-  /// Returns the Kind of lane offset.
-  Kind getKind() const { return LaneKind; }
-
-  /// Returns true if this is the first lane of the whole vector.
-  bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
-
-  /// Maps the lane to a cache index based on \p VF.
-  unsigned mapToCacheIndex(const ElementCount &VF) const {
-    switch (LaneKind) {
-    case VPLane::Kind::ScalableLast:
-      assert(VF.isScalable() && Lane < VF.getKnownMinValue());
-      return VF.getKnownMinValue() + Lane;
-    default:
-      assert(Lane < VF.getKnownMinValue());
-      return Lane;
-    }
-  }
-};
-
-/// VPTransformState holds information passed down when "executing" a VPlan,
-/// needed for generating the output IR.
-struct VPTransformState {
-  VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF,
-                   LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
-                   InnerLoopVectorizer *ILV, VPlan *Plan,
-                   Loop *CurrentParentLoop, Type *CanonicalIVTy);
-  /// Target Transform Info.
-  const TargetTransformInfo *TTI;
-
-  /// The chosen Vectorization Factor of the loop being vectorized.
-  ElementCount VF;
-
-  /// Hold the index to generate specific scalar instructions. Null indicates
-  /// that all instances are to be generated, using either scalar or vector
-  /// instructions.
-  std::optional<VPLane> Lane;
-
-  struct DataState {
-    // Each value from the original loop, when vectorized, is represented by a
-    // vector value in the map.
-    DenseMap<VPValue *, Value *> VPV2Vector;
-
-    DenseMap<VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
-  } Data;
-
-  /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
-  /// is false, otherwise return the generated scalar. \See set.
-  Value *get(VPValue *Def, bool IsScalar = false);
-
-  /// Get the generated Value for a given VPValue and given Part and Lane.
-  Value *get(VPValue *Def, const VPLane &Lane);
-
-  bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
-
-  bool hasScalarValue(VPValue *Def, VPLane Lane) {
-    auto I = Data.VPV2Scalars.find(Def);
-    if (I == Data.VPV2Scalars.end())
-      return false;
-    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
-    return CacheIdx < I->second.size() && I->second[CacheIdx];
-  }
-
-  /// Set the generated vector Value for a given VPValue, if \p
-  /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
-  void set(VPValue *Def, Value *V, bool IsScalar = false) {
-    if (IsScalar) {
-      set(Def, V, VPLane(0));
-      return;
-    }
-    assert((VF.isScalar() || V->getType()->isVectorTy()) &&
-           "scalar values must be stored as (0, 0)");
-    Data.VPV2Vector[Def] = V;
-  }
-
-  /// Reset an existing vector value for \p Def and a given \p Part.
-  void reset(VPValue *Def, Value *V) {
-    assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
-    Data.VPV2Vector[Def] = V;
-  }
-
-  /// Set the generated scalar \p V for \p Def and the given \p Lane.
-  void set(VPValue *Def, Value *V, const VPLane &Lane) {
-    auto &Scalars = Data.VPV2Scalars[Def];
-    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
-    if (Scalars.size() <= CacheIdx)
-      Scalars.resize(CacheIdx + 1);
-    assert(!Scalars[CacheIdx] && "should overwrite existing value");
-    Scalars[CacheIdx] = V;
-  }
-
-  /// Reset an existing scalar value for \p Def and a given \p Lane.
-  void reset(VPValue *Def, Value *V, const VPLane &Lane) {
-    auto Iter = Data.VPV2Scalars.find(Def);
-    assert(Iter != Data.VPV2Scalars.end() &&
-           "need to overwrite existing value");
-    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
-    assert(CacheIdx < Iter->second.size() &&
-           "need to overwrite existing value");
-    Iter->second[CacheIdx] = V;
-  }
-
-  /// Add additional metadata to \p To that was not present on \p Orig.
-  ///
-  /// Currently this is used to add the noalias annotations based on the
-  /// inserted memchecks.  Use this for instructions that are *cloned* into the
-  /// vector loop.
-  void addNewMetadata(Instruction *To, const Instruction *Orig);
-
-  /// Add metadata from one instruction to another.
-  ///
-  /// This includes both the original MDs from \p From and additional ones (\see
-  /// addNewMetadata).  Use this for *newly created* instructions in the vector
-  /// loop.
-  void addMetadata(Value *To, Instruction *From);
-
-  /// Set the debug location in the builder using the debug location \p DL.
-  void setDebugLocFrom(DebugLoc DL);
-
-  /// Construct the vector value of a scalarized value \p V one lane at a time.
-  void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
-
-  /// Hold state information used when constructing the CFG of the output IR,
-  /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
-  struct CFGState {
-    /// The previous VPBasicBlock visited. Initially set to null.
-    VPBasicBlock *PrevVPBB = nullptr;
-
-    /// The previous IR BasicBlock created or used. Initially set to the new
-    /// header BasicBlock.
-    BasicBlock *PrevBB = nullptr;
-
-    /// The last IR BasicBlock in the output IR. Set to the exit block of the
-    /// vector loop.
-    BasicBlock *ExitBB = nullptr;
-
-    /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
-    /// of replication, maps the BasicBlock of the last replica created.
-    SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
-
-    /// Updater for the DominatorTree.
-    DomTreeUpdater DTU;
-
-    CFGState(DominatorTree *DT)
-        : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
-
-    /// Returns the BasicBlock* mapped to the pre-header of the loop region
-    /// containing \p R.
-    BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
-  } CFG;
-
-  /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
-  LoopInfo *LI;
-
-  /// Hold a reference to the IRBuilder used to generate output IR code.
-  IRBuilderBase &Builder;
-
-  /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
-  InnerLoopVectorizer *ILV;
-
-  /// Pointer to the VPlan code is generated for.
-  VPlan *Plan;
-
-  /// The parent loop object for the current scope, or nullptr.
-  Loop *CurrentParentLoop = nullptr;
-
-  /// LoopVersioning.  It's only set up (non-null) if memchecks were
-  /// used.
-  ///
-  /// This is currently only used to add no-alias metadata based on the
-  /// memchecks.  The actually versioning is performed manually.
-  LoopVersioning *LVer = nullptr;
-
-  /// Map SCEVs to their expanded values. Populated when executing
-  /// VPExpandSCEVRecipes.
-  DenseMap<const SCEV *, Value *> ExpandedSCEVs;
-
-  /// VPlan-based type analysis.
-  VPTypeAnalysis TypeAnalysis;
-};
-
 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
 class VPBlockBase {
@ -654,10 +340,7 @@ public:
                     VPSlotTracker &SlotTracker) const = 0;

  /// Print plain-text dump of this VPlan to \p O.
-  void print(raw_ostream &O) const {
-    VPSlotTracker SlotTracker(getPlan());
-    print(O, "", SlotTracker);
-  }
+  void print(raw_ostream &O) const;

  /// Print the successors of this block to \p O, prefixing all lines with \p
  /// Indent.
@ -673,34 +356,6 @@ public:
  virtual VPBlockBase *clone() = 0;
 };

-/// Struct to hold various analysis needed for cost computations.
-struct VPCostContext {
-  const TargetTransformInfo &TTI;
-  const TargetLibraryInfo &TLI;
-  VPTypeAnalysis Types;
-  LLVMContext &LLVMCtx;
-  LoopVectorizationCostModel &CM;
-  SmallPtrSet<Instruction *, 8> SkipCostComputation;
-  TargetTransformInfo::TargetCostKind CostKind;
-
-  VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
-                Type *CanIVTy, LoopVectorizationCostModel &CM,
-                TargetTransformInfo::TargetCostKind CostKind)
-      : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
-        CM(CM), CostKind(CostKind) {}
-
-  /// Return the cost for \p UI with \p VF using the legacy cost model as
-  /// fallback until computing the cost of all recipes migrates to VPlan.
-  InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
-
-  /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
-  /// has already been pre-computed.
-  bool skipCostComputation(Instruction *UI, bool IsVector) const;
-
-  /// Returns the OperandInfo for \p V, if it is a live-in.
-  TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
-};
-
 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
 /// instructions. VPRecipeBase owns the VPValues it defines through VPDef
 /// and is responsible for deleting its defined values. Single-value
@ -3671,12 +3326,12 @@ protected:

  /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
  /// generated for this VPBB.
-  void connectToPredecessors(VPTransformState::CFGState &CFG);
+  void connectToPredecessors(VPTransformState &State);

 private:
  /// Create an IR BasicBlock to hold the output instructions generated by this
  /// VPBasicBlock, and return it. Update the CFGState accordingly.
-  BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
+  BasicBlock *createEmptyBasicBlock(VPTransformState &State);
 };

 /// A special type of VPBasicBlock that wraps an existing IR basic block.
@ -4146,55 +3801,6 @@ public:
 };

 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
-/// indented and follows the dot format.
-class VPlanPrinter {
-  raw_ostream &OS;
-  const VPlan &Plan;
-  unsigned Depth = 0;
-  unsigned TabWidth = 2;
-  std::string Indent;
-  unsigned BID = 0;
-  SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
-
-  VPSlotTracker SlotTracker;
-
-  /// Handle indentation.
-  void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
-
-  /// Print a given \p Block of the Plan.
-  void dumpBlock(const VPBlockBase *Block);
-
-  /// Print the information related to the CFG edges going out of a given
-  /// \p Block, followed by printing the successor blocks themselves.
-  void dumpEdges(const VPBlockBase *Block);
-
-  /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
-  /// its successor blocks.
-  void dumpBasicBlock(const VPBasicBlock *BasicBlock);
-
-  /// Print a given \p Region of the Plan.
-  void dumpRegion(const VPRegionBlock *Region);
-
-  unsigned getOrCreateBID(const VPBlockBase *Block) {
-    return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
-  }
-
-  Twine getOrCreateName(const VPBlockBase *Block);
-
-  Twine getUID(const VPBlockBase *Block);
-
-  /// Print the information related to a CFG edge between two VPBlockBases.
-  void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
-                const Twine &Label);
-
-public:
-  VPlanPrinter(raw_ostream &O, const VPlan &P)
-      : OS(O), Plan(P), SlotTracker(&P) {}
-
-  LLVM_DUMP_METHOD void dump();
-};
-
 struct VPlanIngredient {
  const Value *V;

@ -4214,139 +3820,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
 }
 #endif

-class VPInterleavedAccessInfo {
-  DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
-      InterleaveGroupMap;
-
-  /// Type for mapping of instruction based interleave groups to VPInstruction
-  /// interleave groups
-  using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
-                             InterleaveGroup<VPInstruction> *>;
-
-  /// Recursively \p Region and populate VPlan based interleave groups based on
-  /// \p IAI.
-  void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
-                   InterleavedAccessInfo &IAI);
-  /// Recursively traverse \p Block and populate VPlan based interleave groups
-  /// based on \p IAI.
-  void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
-                  InterleavedAccessInfo &IAI);
-
-public:
-  VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
-
-  ~VPInterleavedAccessInfo() {
-    SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
-    // Avoid releasing a pointer twice.
-    for (auto &I : InterleaveGroupMap)
-      DelSet.insert(I.second);
-    for (auto *Ptr : DelSet)
-      delete Ptr;
-  }
-
-  /// Get the interleave group that \p Instr belongs to.
-  ///
-  /// \returns nullptr if doesn't have such group.
-  InterleaveGroup<VPInstruction> *
-  getInterleaveGroup(VPInstruction *Instr) const {
-    return InterleaveGroupMap.lookup(Instr);
-  }
-};
-
-/// Class that maps (parts of) an existing VPlan to trees of combined
-/// VPInstructions.
-class VPlanSlp {
-  enum class OpMode { Failed, Load, Opcode };
-
-  /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
-  /// DenseMap keys.
-  struct BundleDenseMapInfo {
-    static SmallVector<VPValue *, 4> getEmptyKey() {
-      return {reinterpret_cast<VPValue *>(-1)};
-    }
-
-    static SmallVector<VPValue *, 4> getTombstoneKey() {
-      return {reinterpret_cast<VPValue *>(-2)};
-    }
-
-    static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
-      return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
-    }
-
-    static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
-                        const SmallVector<VPValue *, 4> &RHS) {
-      return LHS == RHS;
-    }
-  };
-
-  /// Mapping of values in the original VPlan to a combined VPInstruction.
-  DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
-      BundleToCombined;
-
-  VPInterleavedAccessInfo &IAI;
-
-  /// Basic block to operate on. For now, only instructions in a single BB are
-  /// considered.
-  const VPBasicBlock &BB;
-
-  /// Indicates whether we managed to combine all visited instructions or not.
-  bool CompletelySLP = true;
-
-  /// Width of the widest combined bundle in bits.
-  unsigned WidestBundleBits = 0;
-
-  using MultiNodeOpTy =
-      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
-
-  // Input operand bundles for the current multi node. Each multi node operand
-  // bundle contains values not matching the multi node's opcode. They will
-  // be reordered in reorderMultiNodeOps, once we completed building a
-  // multi node.
-  SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
-
-  /// Indicates whether we are building a multi node currently.
-  bool MultiNodeActive = false;
-
-  /// Check if we can vectorize Operands together.
-  bool areVectorizable(ArrayRef<VPValue *> Operands) const;
-
-  /// Add combined instruction \p New for the bundle \p Operands.
-  void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
-
-  /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
-  VPInstruction *markFailed();
-
-  /// Reorder operands in the multi node to maximize sequential memory access
-  /// and commutative operations.
-  SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
-
-  /// Choose the best candidate to use for the lane after \p Last. The set of
-  /// candidates to choose from are values with an opcode matching \p Last's
-  /// or loads consecutive to \p Last.
-  std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
-                                       SmallPtrSetImpl<VPValue *> &Candidates,
-                                       VPInterleavedAccessInfo &IAI);
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  /// Print bundle \p Values to dbgs().
-  void dumpBundle(ArrayRef<VPValue *> Values);
-#endif
-
-public:
-  VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
-
-  ~VPlanSlp() = default;
-
-  /// Tries to build an SLP tree rooted at \p Operands and returns a
-  /// VPInstruction combining \p Operands, if they can be combined.
-  VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
-
-  /// Return the width of the widest combined bundle in bits.
-  unsigned getWidestBundleBits() const { return WidestBundleBits; }
-
-  /// Return true if all visited instruction can be combined.
-  bool isCompletelySLP() const { return CompletelySLP; }
-};
 } // end namespace llvm

 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@ -0,0 +1,468 @@
+//===- VPlanHelpers.h - VPlan-related auxiliary helpers -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the declarations of different VPlan-related auxiliary
+/// helpers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
+
+#include "VPlanAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/InstructionCost.h"
+
+namespace llvm {
+
+class BasicBlock;
+class DominatorTree;
+class InnerLoopVectorizer;
+class IRBuilderBase;
+class LoopInfo;
+class SCEV;
+class Type;
+class VPBasicBlock;
+class VPRegionBlock;
+class VPlan;
+class Value;
+class LoopVersioning;
+
+/// Returns a calculation for the total number of elements for a given \p VF.
+/// For fixed width vectors this value is a constant, whereas for scalable
+/// vectors it is an expression determined at runtime.
+Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
+
+/// Return a value for Step multiplied by VF.
+Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
+                       int64_t Step);
+
+/// A helper function that returns the reciprocal of the block probability of
+/// predicated blocks. If we return X, we are assuming the predicated block
+/// will execute once for every X iterations of the loop header.
+///
+/// TODO: We should use actual block probability here, if available. Currently,
+///       we always assume predicated blocks have a 50% chance of executing.
+inline unsigned getReciprocalPredBlockProb() { return 2; }
+
+/// A range of powers-of-2 vectorization factors with fixed start and
+/// adjustable end. The range includes start and excludes end, e.g.,:
+/// [1, 16) = {1, 2, 4, 8}
+struct VFRange {
+  // A power of 2.
+  const ElementCount Start;
+
+  // A power of 2. If End <= Start range is empty.
+  ElementCount End;
+
+  bool isEmpty() const {
+    return End.getKnownMinValue() <= Start.getKnownMinValue();
+  }
+
+  VFRange(const ElementCount &Start, const ElementCount &End)
+      : Start(Start), End(End) {
+    assert(Start.isScalable() == End.isScalable() &&
+           "Both Start and End should have the same scalable flag");
+    assert(isPowerOf2_32(Start.getKnownMinValue()) &&
+           "Expected Start to be a power of 2");
+    assert(isPowerOf2_32(End.getKnownMinValue()) &&
+           "Expected End to be a power of 2");
+  }
+
+  /// Iterator to iterate over vectorization factors in a VFRange.
+  class iterator
+      : public iterator_facade_base<iterator, std::forward_iterator_tag,
+                                    ElementCount> {
+    ElementCount VF;
+
+  public:
+    iterator(ElementCount VF) : VF(VF) {}
+
+    bool operator==(const iterator &Other) const { return VF == Other.VF; }
+
+    ElementCount operator*() const { return VF; }
+
+    iterator &operator++() {
+      VF *= 2;
+      return *this;
+    }
+  };
+
+  iterator begin() { return iterator(Start); }
+  iterator end() {
+    assert(isPowerOf2_32(End.getKnownMinValue()));
+    return iterator(End);
+  }
+};
+
+/// In what follows, the term "input IR" refers to code that is fed into the
+/// vectorizer whereas the term "output IR" refers to code that is generated by
+/// the vectorizer.
+
+/// VPLane provides a way to access lanes in both fixed width and scalable
+/// vectors, where for the latter the lane index sometimes needs calculating
+/// as a runtime expression.
+class VPLane {
+public:
+  /// Kind describes how to interpret Lane.
+  enum class Kind : uint8_t {
+    /// For First, Lane is the index into the first N elements of a
+    /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
+    First,
+    /// For ScalableLast, Lane is the offset from the start of the last
+    /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
+    /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
+    /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
+    ScalableLast
+  };
+
+private:
+  /// in [0..VF)
+  unsigned Lane;
+
+  /// Indicates how the Lane should be interpreted, as described above.
+  Kind LaneKind = Kind::First;
+
+public:
+  VPLane(unsigned Lane) : Lane(Lane) {}
+  VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
+
+  static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
+
+  static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
+    assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
+           "trying to extract with invalid offset");
+    unsigned LaneOffset = VF.getKnownMinValue() - Offset;
+    Kind LaneKind;
+    if (VF.isScalable())
+      // In this case 'LaneOffset' refers to the offset from the start of the
+      // last subvector with VF.getKnownMinValue() elements.
+      LaneKind = VPLane::Kind::ScalableLast;
+    else
+      LaneKind = VPLane::Kind::First;
+    return VPLane(LaneOffset, LaneKind);
+  }
+
+  static VPLane getLastLaneForVF(const ElementCount &VF) {
+    return getLaneFromEnd(VF, 1);
+  }
+
+  /// Returns a compile-time known value for the lane index and asserts if the
+  /// lane can only be calculated at runtime.
+  unsigned getKnownLane() const {
+    assert(LaneKind == Kind::First &&
+           "can only get known lane from the beginning");
+    return Lane;
+  }
+
+  /// Returns an expression describing the lane index that can be used at
+  /// runtime.
+  Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
+
+  /// Returns the Kind of lane offset.
+  Kind getKind() const { return LaneKind; }
+
+  /// Returns true if this is the first lane of the whole vector.
+  bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
+
+  /// Maps the lane to a cache index based on \p VF.
+  unsigned mapToCacheIndex(const ElementCount &VF) const {
+    switch (LaneKind) {
+    case VPLane::Kind::ScalableLast:
+      assert(VF.isScalable() && Lane < VF.getKnownMinValue() &&
+             "ScalableLast can only be used with scalable VFs");
+      return VF.getKnownMinValue() + Lane;
+    default:
+      assert(Lane < VF.getKnownMinValue() &&
+             "Cannot extract lane larger than VF");
+      return Lane;
+    }
+  }
+};
+
+/// VPTransformState holds information passed down when "executing" a VPlan,
+/// needed for generating the output IR.
+struct VPTransformState {
+  VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF,
+                   LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
+                   InnerLoopVectorizer *ILV, VPlan *Plan,
+                   Loop *CurrentParentLoop, Type *CanonicalIVTy);
+  /// Target Transform Info.
+  const TargetTransformInfo *TTI;
+
+  /// The chosen Vectorization Factor of the loop being vectorized.
+  ElementCount VF;
+
+  /// Hold the index to generate specific scalar instructions. Null indicates
+  /// that all instances are to be generated, using either scalar or vector
+  /// instructions.
+  std::optional<VPLane> Lane;
+
+  struct DataState {
+    // Each value from the original loop, when vectorized, is represented by a
+    // vector value in the map.
+    DenseMap<VPValue *, Value *> VPV2Vector;
+
+    DenseMap<VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
+  } Data;
+
+  /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
+  /// is false, otherwise return the generated scalar. \See set.
+  Value *get(VPValue *Def, bool IsScalar = false);
+
+  /// Get the generated Value for a given VPValue and given Part and Lane.
+  Value *get(VPValue *Def, const VPLane &Lane);
+
+  bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
+
+  bool hasScalarValue(VPValue *Def, VPLane Lane) {
+    auto I = Data.VPV2Scalars.find(Def);
+    if (I == Data.VPV2Scalars.end())
+      return false;
+    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+    return CacheIdx < I->second.size() && I->second[CacheIdx];
+  }
+
+  /// Set the generated vector Value for a given VPValue, if \p
+  /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
+  void set(VPValue *Def, Value *V, bool IsScalar = false) {
+    if (IsScalar) {
+      set(Def, V, VPLane(0));
+      return;
+    }
+    assert((VF.isScalar() || V->getType()->isVectorTy()) &&
+           "scalar values must be stored as (0, 0)");
+    Data.VPV2Vector[Def] = V;
+  }
+
+  /// Reset an existing vector value for \p Def and a given \p Part.
+  void reset(VPValue *Def, Value *V) {
+    assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
+    Data.VPV2Vector[Def] = V;
+  }
+
+  /// Set the generated scalar \p V for \p Def and the given \p Lane.
+  void set(VPValue *Def, Value *V, const VPLane &Lane) {
+    auto &Scalars = Data.VPV2Scalars[Def];
+    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+    if (Scalars.size() <= CacheIdx)
+      Scalars.resize(CacheIdx + 1);
+    assert(!Scalars[CacheIdx] && "should overwrite existing value");
+    Scalars[CacheIdx] = V;
+  }
+
+  /// Reset an existing scalar value for \p Def and a given \p Lane.
+  void reset(VPValue *Def, Value *V, const VPLane &Lane) {
+    auto Iter = Data.VPV2Scalars.find(Def);
+    assert(Iter != Data.VPV2Scalars.end() &&
+           "need to overwrite existing value");
+    unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+    assert(CacheIdx < Iter->second.size() &&
+           "need to overwrite existing value");
+    Iter->second[CacheIdx] = V;
+  }
+
+  /// Add additional metadata to \p To that was not present on \p Orig.
+  ///
+  /// Currently this is used to add the noalias annotations based on the
+  /// inserted memchecks.  Use this for instructions that are *cloned* into the
+  /// vector loop.
+  void addNewMetadata(Instruction *To, const Instruction *Orig);
+
+  /// Add metadata from one instruction to another.
+  ///
+  /// This includes both the original MDs from \p From and additional ones (\see
+  /// addNewMetadata).  Use this for *newly created* instructions in the vector
+  /// loop.
+  void addMetadata(Value *To, Instruction *From);
+
+  /// Set the debug location in the builder using the debug location \p DL.
+  void setDebugLocFrom(DebugLoc DL);
+
+  /// Construct the vector value of a scalarized value \p V one lane at a time.
+  void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
+
+  /// Hold state information used when constructing the CFG of the output IR,
+  /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
+  struct CFGState {
+    /// The previous VPBasicBlock visited. Initially set to null.
+    VPBasicBlock *PrevVPBB = nullptr;
+
+    /// The previous IR BasicBlock created or used. Initially set to the new
+    /// header BasicBlock.
+    BasicBlock *PrevBB = nullptr;
+
+    /// The last IR BasicBlock in the output IR. Set to the exit block of the
+    /// vector loop.
+    BasicBlock *ExitBB = nullptr;
+
+    /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
+    /// of replication, maps the BasicBlock of the last replica created.
+    SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
+
+    /// Updater for the DominatorTree.
+    DomTreeUpdater DTU;
+
+    CFGState(DominatorTree *DT)
+        : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
+
+    /// Returns the BasicBlock* mapped to the pre-header of the loop region
+    /// containing \p R.
+    BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
+  } CFG;
+
+  /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
+  LoopInfo *LI;
+
+  /// Hold a reference to the IRBuilder used to generate output IR code.
+  IRBuilderBase &Builder;
+
+  /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
+  InnerLoopVectorizer *ILV;
+
+  /// Pointer to the VPlan code is generated for.
+  VPlan *Plan;
+
+  /// The parent loop object for the current scope, or nullptr.
+  Loop *CurrentParentLoop = nullptr;
+
+  /// LoopVersioning.  It's only set up (non-null) if memchecks were
+  /// used.
+  ///
+  /// This is currently only used to add no-alias metadata based on the
+  /// memchecks.  The actually versioning is performed manually.
+  LoopVersioning *LVer = nullptr;
+
+  /// Map SCEVs to their expanded values. Populated when executing
+  /// VPExpandSCEVRecipes.
+  DenseMap<const SCEV *, Value *> ExpandedSCEVs;
+
+  /// VPlan-based type analysis.
+  VPTypeAnalysis TypeAnalysis;
+};
+
+/// Struct to hold various analysis needed for cost computations.
+struct VPCostContext {
+  const TargetTransformInfo &TTI;
+  const TargetLibraryInfo &TLI;
+  VPTypeAnalysis Types;
+  LLVMContext &LLVMCtx;
+  LoopVectorizationCostModel &CM;
+  SmallPtrSet<Instruction *, 8> SkipCostComputation;
+  TargetTransformInfo::TargetCostKind CostKind;
+
+  VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
+                Type *CanIVTy, LoopVectorizationCostModel &CM,
+                TargetTransformInfo::TargetCostKind CostKind)
+      : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
+        CM(CM), CostKind(CostKind) {}
+
+  /// Return the cost for \p UI with \p VF using the legacy cost model as
+  /// fallback until computing the cost of all recipes migrates to VPlan.
+  InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
+
+  /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
+  /// has already been pre-computed.
+  bool skipCostComputation(Instruction *UI, bool IsVector) const;
+
+  /// Returns the OperandInfo for \p V, if it is a live-in.
+  TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
+};
+
+/// This class can be used to assign names to VPValues. For VPValues without
+/// underlying value, assign consecutive numbers and use those as names (wrapped
+/// in vp<>). Otherwise, use the name from the underlying value (wrapped in
+/// ir<>), appending a .V version number if there are multiple uses of the same
+/// name. Allows querying names for VPValues for printing, similar to the
+/// ModuleSlotTracker for IR values.
+class VPSlotTracker {
+  /// Keep track of versioned names assigned to VPValues with underlying IR
+  /// values.
+  DenseMap<const VPValue *, std::string> VPValue2Name;
+  /// Keep track of the next number to use to version the base name.
+  StringMap<unsigned> BaseName2Version;
+
+  /// Number to assign to the next VPValue without underlying value.
+  unsigned NextSlot = 0;
+
+  void assignName(const VPValue *V);
+  void assignNames(const VPlan &Plan);
+  void assignNames(const VPBasicBlock *VPBB);
+
+public:
+  VPSlotTracker(const VPlan *Plan = nullptr) {
+    if (Plan)
+      assignNames(*Plan);
+  }
+
+  /// Returns the name assigned to \p V, if there is one, otherwise try to
+  /// construct one from the underlying value, if there's one; else return
+  /// <badref>.
+  std::string getOrCreateName(const VPValue *V) const;
+};
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
+/// indented and follows the dot format.
+class VPlanPrinter {
+  raw_ostream &OS;
+  const VPlan &Plan;
+  unsigned Depth = 0;
+  unsigned TabWidth = 2;
+  std::string Indent;
+  unsigned BID = 0;
+  SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
+
+  VPSlotTracker SlotTracker;
+
+  /// Handle indentation.
+  void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
+
+  /// Print a given \p Block of the Plan.
+  void dumpBlock(const VPBlockBase *Block);
+
+  /// Print the information related to the CFG edges going out of a given
+  /// \p Block, followed by printing the successor blocks themselves.
+  void dumpEdges(const VPBlockBase *Block);
+
+  /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
+  /// its successor blocks.
+  void dumpBasicBlock(const VPBasicBlock *BasicBlock);
+
+  /// Print a given \p Region of the Plan.
+  void dumpRegion(const VPRegionBlock *Region);
+
+  unsigned getOrCreateBID(const VPBlockBase *Block) {
+    return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
+  }
+
+  Twine getOrCreateName(const VPBlockBase *Block);
+
+  Twine getUID(const VPBlockBase *Block);
+
+  /// Print the information related to a CFG edge between two VPBlockBases.
+  void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
+                const Twine &Label);
+
+public:
+  VPlanPrinter(raw_ostream &O, const VPlan &P)
+      : OS(O), Plan(P), SlotTracker(&P) {}
+
+  LLVM_DUMP_METHOD void dump();
+};
+#endif
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@ -14,12 +14,14 @@
 #include "LoopVectorizationPlanner.h"
 #include "VPlan.h"
 #include "VPlanAnalysis.h"
+#include "VPlanHelpers.h"
 #include "VPlanPatternMatch.h"
 #include "VPlanUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@ -14,10 +14,13 @@
 ///
 //===----------------------------------------------------------------------===//

+#include "VPlanSLP.h"
 #include "VPlan.h"
+#include "VPlanCFG.h"
 #include "VPlanValue.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@ -39,6 +42,57 @@ using namespace llvm;
 // Number of levels to look ahead when re-ordering multi node operands.
 static unsigned LookaheadMaxDepth = 5;

+void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
+                                          Old2NewTy &Old2New,
+                                          InterleavedAccessInfo &IAI) {
+  ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+      Region->getEntry());
+  for (VPBlockBase *Base : RPOT) {
+    visitBlock(Base, Old2New, IAI);
+  }
+}
+
+void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+                                         InterleavedAccessInfo &IAI) {
+  if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
+    for (VPRecipeBase &VPI : *VPBB) {
+      if (isa<VPWidenPHIRecipe>(&VPI))
+        continue;
+      auto *VPInst = cast<VPInstruction>(&VPI);
+      auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
+      if (!Inst)
+        continue;
+      auto *IG = IAI.getInterleaveGroup(Inst);
+      if (!IG)
+        continue;
+
+      auto NewIGIter = Old2New.find(IG);
+      if (NewIGIter == Old2New.end())
+        Old2New[IG] = new InterleaveGroup<VPInstruction>(
+            IG->getFactor(), IG->isReverse(), IG->getAlign());
+
+      if (Inst == IG->getInsertPos())
+        Old2New[IG]->setInsertPos(VPInst);
+
+      InterleaveGroupMap[VPInst] = Old2New[IG];
+      InterleaveGroupMap[VPInst]->insertMember(
+          VPInst, IG->getIndex(Inst),
+          Align(IG->isReverse() ? (-1) * int(IG->getFactor())
+                                : IG->getFactor()));
+    }
+  } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) {
+    visitRegion(Region, Old2New, IAI);
+  } else {
+    llvm_unreachable("Unsupported kind of VPBlock.");
+  }
+}
+
+VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
+                                                 InterleavedAccessInfo &IAI) {
+  Old2NewTy Old2New;
+  visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
+}
+
 VPInstruction *VPlanSlp::markFailed() {
  // FIXME: Currently this is used to signal we hit instructions we cannot
  //        trivially SLP'ize.
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.h
@ -0,0 +1,166 @@
+//===- VPlan.h - VPlan-based SLP ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the declarations for VPlan-based SLP.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANSLP_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANSLP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/VectorUtils.h"
+
+namespace llvm {
+
+class VPBasicBlock;
+class VPBlockBase;
+class VPRegionBlock;
+class VPlan;
+class VPValue;
+class VPInstruction;
+
+class VPInterleavedAccessInfo {
+  DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
+      InterleaveGroupMap;
+
+  /// Type for mapping of instruction based interleave groups to VPInstruction
+  /// interleave groups
+  using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
+                             InterleaveGroup<VPInstruction> *>;
+
+  /// Recursively \p Region and populate VPlan based interleave groups based on
+  /// \p IAI.
+  void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
+                   InterleavedAccessInfo &IAI);
+  /// Recursively traverse \p Block and populate VPlan based interleave groups
+  /// based on \p IAI.
+  void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+                  InterleavedAccessInfo &IAI);
+
+public:
+  VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
+
+  ~VPInterleavedAccessInfo() {
+    SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
+    // Avoid releasing a pointer twice.
+    for (auto &I : InterleaveGroupMap)
+      DelSet.insert(I.second);
+    for (auto *Ptr : DelSet)
+      delete Ptr;
+  }
+
+  /// Get the interleave group that \p Instr belongs to.
+  ///
+  /// \returns nullptr if doesn't have such group.
+  InterleaveGroup<VPInstruction> *
+  getInterleaveGroup(VPInstruction *Instr) const {
+    return InterleaveGroupMap.lookup(Instr);
+  }
+};
+
+/// Class that maps (parts of) an existing VPlan to trees of combined
+/// VPInstructions.
+class VPlanSlp {
+  enum class OpMode { Failed, Load, Opcode };
+
+  /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
+  /// DenseMap keys.
+  struct BundleDenseMapInfo {
+    static SmallVector<VPValue *, 4> getEmptyKey() {
+      return {reinterpret_cast<VPValue *>(-1)};
+    }
+
+    static SmallVector<VPValue *, 4> getTombstoneKey() {
+      return {reinterpret_cast<VPValue *>(-2)};
+    }
+
+    static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
+      return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+    }
+
+    static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
+                        const SmallVector<VPValue *, 4> &RHS) {
+      return LHS == RHS;
+    }
+  };
+
+  /// Mapping of values in the original VPlan to a combined VPInstruction.
+  DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
+      BundleToCombined;
+
+  VPInterleavedAccessInfo &IAI;
+
+  /// Basic block to operate on. For now, only instructions in a single BB are
+  /// considered.
+  const VPBasicBlock &BB;
+
+  /// Indicates whether we managed to combine all visited instructions or not.
+  bool CompletelySLP = true;
+
+  /// Width of the widest combined bundle in bits.
+  unsigned WidestBundleBits = 0;
+
+  using MultiNodeOpTy =
+      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+
+  // Input operand bundles for the current multi node. Each multi node operand
+  // bundle contains values not matching the multi node's opcode. They will
+  // be reordered in reorderMultiNodeOps, once we completed building a
+  // multi node.
+  SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
+
+  /// Indicates whether we are building a multi node currently.
+  bool MultiNodeActive = false;
+
+  /// Check if we can vectorize Operands together.
+  bool areVectorizable(ArrayRef<VPValue *> Operands) const;
+
+  /// Add combined instruction \p New for the bundle \p Operands.
+  void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
+
+  /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
+  VPInstruction *markFailed();
+
+  /// Reorder operands in the multi node to maximize sequential memory access
+  /// and commutative operations.
+  SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
+
+  /// Choose the best candidate to use for the lane after \p Last. The set of
+  /// candidates to choose from are values with an opcode matching \p Last's
+  /// or loads consecutive to \p Last.
+  std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
+                                       SmallPtrSetImpl<VPValue *> &Candidates,
+                                       VPInterleavedAccessInfo &IAI);
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print bundle \p Values to dbgs().
+  void dumpBundle(ArrayRef<VPValue *> Values);
+#endif
+
+public:
+  VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
+
+  ~VPlanSlp() = default;
+
+  /// Tries to build an SLP tree rooted at \p Operands and returns a
+  /// VPInstruction combining \p Operands, if they can be combined.
+  VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
+
+  /// Return the width of the widest combined bundle in bits.
+  unsigned getWidestBundleBits() const { return WidestBundleBits; }
+
+  /// Return true if all visited instruction can be combined.
+  bool isCompletelySLP() const { return CompletelySLP; }
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@ -25,6 +25,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@ -435,41 +435,6 @@ public:
 #endif
 };

-class VPlan;
-class VPBasicBlock;
-
-/// This class can be used to assign names to VPValues. For VPValues without
-/// underlying value, assign consecutive numbers and use those as names (wrapped
-/// in vp<>). Otherwise, use the name from the underlying value (wrapped in
-/// ir<>), appending a .V version number if there are multiple uses of the same
-/// name. Allows querying names for VPValues for printing, similar to the
-/// ModuleSlotTracker for IR values.
-class VPSlotTracker {
-  /// Keep track of versioned names assigned to VPValues with underlying IR
-  /// values.
-  DenseMap<const VPValue *, std::string> VPValue2Name;
-  /// Keep track of the next number to use to version the base name.
-  StringMap<unsigned> BaseName2Version;
-
-  /// Number to assign to the next VPValue without underlying value.
-  unsigned NextSlot = 0;
-
-  void assignName(const VPValue *V);
-  void assignNames(const VPlan &Plan);
-  void assignNames(const VPBasicBlock *VPBB);
-
-public:
-  VPSlotTracker(const VPlan *Plan = nullptr) {
-    if (Plan)
-      assignNames(*Plan);
-  }
-
-  /// Returns the name assigned to \p V, if there is one, otherwise try to
-  /// construct one from the underlying value, if there's one; else return
-  /// <badref>.
-  std::string getOrCreateName(const VPValue *V) const;
-};
-
 } // namespace llvm

 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_VALUE_H
--- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

+#include "../lib/Transforms/Vectorize/VPlanSLP.h"
 #include "../lib/Transforms/Vectorize/VPlan.h"
 #include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h"
 #include "VPlanTestBase.h"
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@ -9,6 +9,7 @@

 #include "../lib/Transforms/Vectorize/VPlan.h"
 #include "../lib/Transforms/Vectorize/VPlanCFG.h"
+#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
 #include "VPlanTestBase.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"