mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-18 18:56:43 +00:00
[VPlan] Only execute VPExpandSCEVRecipes once and remove them (NFC).
Instead of executing the whole entry VPIRBB twice, first only execute the VPExpandSCEVRecipes and replace their uses with the expanded VPValue, which will be a live-in. This allows removing special logic in VPExpandSCEVRecipe to support executing twice and allows moving the ExpandedSCEVs map out of VPTransformState. It will also allow adding other recipes to the entry VPBB in the future.
This commit is contained in:
parent
2fe7585686
commit
c482b8faea
@ -7695,8 +7695,21 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
|
||||
|
||||
// 0. Generate SCEV-dependent code in the entry, including TripCount, before
|
||||
// making any changes to the CFG.
|
||||
if (!BestVPlan.getEntry()->empty())
|
||||
BestVPlan.getEntry()->execute(&State);
|
||||
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
|
||||
auto *Entry = cast<VPIRBasicBlock>(BestVPlan.getEntry());
|
||||
State.Builder.SetInsertPoint(Entry->getIRBasicBlock()->getTerminator());
|
||||
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
|
||||
auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
|
||||
if (!ExpSCEV)
|
||||
continue;
|
||||
ExpSCEV->execute(State);
|
||||
ExpandedSCEVs[ExpSCEV->getSCEV()] = State.get(ExpSCEV, VPLane(0));
|
||||
VPValue *Exp = BestVPlan.getOrAddLiveIn(ExpandedSCEVs[ExpSCEV->getSCEV()]);
|
||||
ExpSCEV->replaceAllUsesWith(Exp);
|
||||
if (BestVPlan.getTripCount() == ExpSCEV)
|
||||
BestVPlan.resetTripCount(Exp);
|
||||
ExpSCEV->eraseFromParent();
|
||||
}
|
||||
|
||||
if (!ILV.getTripCount())
|
||||
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
|
||||
@ -7706,9 +7719,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
|
||||
|
||||
// 1. Set up the skeleton for vectorization, including vector pre-header and
|
||||
// middle block. The vector loop is created during VPlan execution.
|
||||
VPBasicBlock *VectorPH =
|
||||
cast<VPBasicBlock>(BestVPlan.getEntry()->getSingleSuccessor());
|
||||
|
||||
VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor());
|
||||
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
|
||||
if (VectorizingEpilogue)
|
||||
VPlanTransforms::removeDeadRecipes(BestVPlan);
|
||||
@ -7821,7 +7832,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
|
||||
}
|
||||
}
|
||||
|
||||
return State.ExpandedSCEVs;
|
||||
return ExpandedSCEVs;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
@ -355,10 +355,6 @@ struct VPTransformState {
|
||||
/// memchecks. The actually versioning is performed manually.
|
||||
LoopVersioning *LVer = nullptr;
|
||||
|
||||
/// Map SCEVs to their expanded values. Populated when executing
|
||||
/// VPExpandSCEVRecipes.
|
||||
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
|
||||
|
||||
/// VPlan-based type analysis.
|
||||
VPTypeAnalysis TypeAnalysis;
|
||||
|
||||
|
@ -3444,23 +3444,10 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
|
||||
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
|
||||
assert(!State.Lane && "cannot be used in per-lane");
|
||||
if (State.ExpandedSCEVs.contains(Expr)) {
|
||||
// SCEV Expr has already been expanded, result must already be set. At the
|
||||
// moment we have to execute the entry block twice (once before skeleton
|
||||
// creation to get expanded SCEVs used by the skeleton and once during
|
||||
// regular VPlan execution).
|
||||
State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
|
||||
assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
|
||||
"Results must match");
|
||||
return;
|
||||
}
|
||||
|
||||
const DataLayout &DL = SE.getDataLayout();
|
||||
SCEVExpander Exp(SE, DL, "induction", /*PreserveLCSSA=*/true);
|
||||
|
||||
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
|
||||
&*State.Builder.GetInsertPoint());
|
||||
State.ExpandedSCEVs[Expr] = Res;
|
||||
State.set(this, Res, VPLane(0));
|
||||
}
|
||||
|
||||
|
@ -152,11 +152,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: Live-in ir<[[VF:%.+]]> = VF
|
||||
; CHECK-NEXT: Live-in ir<[[VFxUF:%.+]]>.1 = VF * UF
|
||||
; CHECK-NEXT: Live-in ir<[[VEC_TC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
; CHECK-NEXT: ir<%0> = original trip-count
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ir-bb<for.body.preheader>:
|
||||
; CHECK-NEXT: IR %0 = zext i32 %n to i64
|
||||
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
|
||||
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.scevcheck>
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ir-bb<vector.scevcheck>:
|
||||
@ -213,7 +212,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: Successor(s): middle.block
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: middle.block:
|
||||
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
|
||||
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
|
||||
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
|
||||
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
|
||||
; CHECK-EMPTY:
|
||||
@ -402,11 +401,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: Live-in ir<[[VF:%.+]]> = VF
|
||||
; CHECK-NEXT: Live-in ir<[[VFxUF:%.+]]>.1 = VF * UF
|
||||
; CHECK-NEXT: Live-in ir<[[VEC_TC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
; CHECK-NEXT: ir<%0> = original trip-count
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ir-bb<for.body.preheader>:
|
||||
; CHECK-NEXT: IR %0 = zext i32 %n to i64
|
||||
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
|
||||
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.scevcheck>
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ir-bb<vector.scevcheck>:
|
||||
@ -463,7 +461,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: Successor(s): middle.block
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: middle.block:
|
||||
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
|
||||
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
|
||||
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
|
||||
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
|
||||
; CHECK-EMPTY:
|
||||
|
@ -16,12 +16,12 @@ define i8 @widget(ptr %arr, i8 %t9) {
|
||||
; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
|
||||
; CHECK-NEXT: [[T1_0_LCSSA4:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
|
||||
; CHECK-NEXT: [[T1_0_LCSSA1:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
|
||||
; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64
|
||||
; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA4]] to i64
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[ARR1]] to i32
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[TMP0]]
|
||||
; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA3]] to i32
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA4]] to i64
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; CHECK: vector.scevcheck:
|
||||
|
@ -5,10 +5,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; CHECK: VPlan 'Final VPlan for VF={2},UF={1}' {
|
||||
; CHECK-NEXT: Live-in ir<[[VFxUF:.+]]> = VF * UF
|
||||
; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
; CHECK-NEXT: ir<%0> = original trip-count
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ir-bb<entry>:
|
||||
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
|
||||
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ir-bb<vector.ph>:
|
||||
@ -86,7 +85,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; CHECK-NEXT: Successor(s): middle.block
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: middle.block:
|
||||
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VTC]]>
|
||||
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<%0>, ir<[[VTC]]>
|
||||
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
|
||||
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
|
||||
; CHECK-EMPTY:
|
||||
|
@ -62,11 +62,10 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
|
||||
; CHECK: Executing best plan with VF=8, UF=2
|
||||
; CHECK-NEXT: VPlan 'Final VPlan for VF={8},UF={2}' {
|
||||
; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
; CHECK-NEXT: ir<%and> = original trip-count
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ir-bb<entry>:
|
||||
; CHECK-NEXT: IR %and = and i64 %N, 15
|
||||
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64)
|
||||
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ir-bb<vector.ph>:
|
||||
@ -92,7 +91,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
|
||||
; CHECK-NEXT: Successor(s): middle.block
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: middle.block:
|
||||
; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VTC]]>
|
||||
; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq ir<%and>, ir<[[VTC]]>
|
||||
; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]>
|
||||
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
|
||||
; CHECK-EMPTY:
|
||||
|
Loading…
x
Reference in New Issue
Block a user