mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 08:06:40 +00:00
[LoopVectorize] Vectorize select-cmp reduction pattern for increasing integer induction variable (#67812)
Consider the following loop: ``` int rdx = init; for (int i = 0; i < n; ++i) rdx = (a[i] > b[i]) ? i : rdx; ``` We can vectorize this loop if `i` is an increasing induction variable. The final reduced value will be the maximum of `i` that the condition `a[i] > b[i]` is satisfied, or the start value `init`. This patch added new RecurKind enums - IFindLastIV and FFindLastIV. --------- Co-authored-by: Alexey Bataev <5361294+alexey-bataev@users.noreply.github.com>
This commit is contained in:
parent
0876c11cee
commit
b3cba9be41
@ -50,9 +50,16 @@ enum class RecurKind {
|
||||
FMulAdd, ///< Sum of float products with llvm.fmuladd(a * b + sum).
|
||||
IAnyOf, ///< Any_of reduction with select(icmp(),x,y) where one of (x,y) is
|
||||
///< loop invariant, and both x and y are integer type.
|
||||
FAnyOf ///< Any_of reduction with select(fcmp(),x,y) where one of (x,y) is
|
||||
FAnyOf, ///< Any_of reduction with select(fcmp(),x,y) where one of (x,y) is
|
||||
///< loop invariant, and both x and y are integer type.
|
||||
// TODO: Any_of reduction need not be restricted to integer type only.
|
||||
IFindLastIV, ///< FindLast reduction with select(icmp(),x,y) where one of
|
||||
///< (x,y) is increasing loop induction, and both x and y are
|
||||
///< integer type.
|
||||
FFindLastIV ///< FindLast reduction with select(fcmp(),x,y) where one of (x,y)
|
||||
///< is increasing loop induction, and both x and y are integer
|
||||
///< type.
|
||||
// TODO: Any_of and FindLast reduction need not be restricted to integer type
|
||||
// only.
|
||||
};
|
||||
|
||||
/// The RecurrenceDescriptor is used to identify recurrences variables in a
|
||||
@ -124,7 +131,7 @@ public:
|
||||
/// the returned struct.
|
||||
static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I,
|
||||
RecurKind Kind, InstDesc &Prev,
|
||||
FastMathFlags FuncFMF);
|
||||
FastMathFlags FuncFMF, ScalarEvolution *SE);
|
||||
|
||||
/// Returns true if instruction I has multiple uses in Insts
|
||||
static bool hasMultipleUsesOf(Instruction *I,
|
||||
@ -151,6 +158,16 @@ public:
|
||||
static InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I,
|
||||
InstDesc &Prev);
|
||||
|
||||
/// Returns a struct describing whether the instruction is either a
|
||||
/// Select(ICmp(A, B), X, Y), or
|
||||
/// Select(FCmp(A, B), X, Y)
|
||||
/// where one of (X, Y) is an increasing loop induction variable, and the
|
||||
/// other is a PHI value.
|
||||
// TODO: Support non-monotonic variable. FindLast does not need be restricted
|
||||
// to increasing loop induction variables.
|
||||
static InstDesc isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
|
||||
Instruction *I, ScalarEvolution &SE);
|
||||
|
||||
/// Returns a struct describing if the instruction is a
|
||||
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
|
||||
static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
|
||||
@ -236,10 +253,25 @@ public:
|
||||
return Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf;
|
||||
}
|
||||
|
||||
/// Returns true if the recurrence kind is of the form
|
||||
/// select(cmp(),x,y) where one of (x,y) is increasing loop induction.
|
||||
static bool isFindLastIVRecurrenceKind(RecurKind Kind) {
|
||||
return Kind == RecurKind::IFindLastIV || Kind == RecurKind::FFindLastIV;
|
||||
}
|
||||
|
||||
/// Returns the type of the recurrence. This type can be narrower than the
|
||||
/// actual type of the Phi if the recurrence has been type-promoted.
|
||||
Type *getRecurrenceType() const { return RecurrenceType; }
|
||||
|
||||
/// Returns the sentinel value for FindLastIV recurrences to replace the start
|
||||
/// value.
|
||||
Value *getSentinelValue() const {
|
||||
assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind");
|
||||
Type *Ty = StartValue->getType();
|
||||
return ConstantInt::get(Ty,
|
||||
APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
|
||||
}
|
||||
|
||||
/// Returns a reference to the instructions used for type-promoting the
|
||||
/// recurrence.
|
||||
const SmallPtrSet<Instruction *, 8> &getCastInsts() const { return CastInsts; }
|
||||
|
@ -419,6 +419,12 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src,
|
||||
const RecurrenceDescriptor &Desc,
|
||||
PHINode *OrigPhi);
|
||||
|
||||
/// Create a reduction of the given vector \p Src for a reduction of the
|
||||
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction
|
||||
/// operation is described by \p Desc.
|
||||
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
|
||||
const RecurrenceDescriptor &Desc);
|
||||
|
||||
/// Create a generic reduction using a recurrence descriptor \p Desc
|
||||
/// Fast-math-flags are propagated using the RecurrenceDescriptor.
|
||||
Value *createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc,
|
||||
|
@ -51,6 +51,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
|
||||
case RecurKind::UMin:
|
||||
case RecurKind::IAnyOf:
|
||||
case RecurKind::FAnyOf:
|
||||
case RecurKind::IFindLastIV:
|
||||
case RecurKind::FFindLastIV:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -372,7 +374,7 @@ bool RecurrenceDescriptor::AddReductionVar(
|
||||
// type-promoted).
|
||||
if (Cur != Start) {
|
||||
ReduxDesc =
|
||||
isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
|
||||
isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
|
||||
ExactFPMathInst = ExactFPMathInst == nullptr
|
||||
? ReduxDesc.getExactFPMathInst()
|
||||
: ExactFPMathInst;
|
||||
@ -658,6 +660,95 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
|
||||
: RecurKind::FAnyOf);
|
||||
}
|
||||
|
||||
// We are looking for loops that do something like this:
|
||||
// int r = 0;
|
||||
// for (int i = 0; i < n; i++) {
|
||||
// if (src[i] > 3)
|
||||
// r = i;
|
||||
// }
|
||||
// The reduction value (r) is derived from either the values of an increasing
|
||||
// induction variable (i) sequence, or from the start value (0).
|
||||
// The LLVM IR generated for such loops would be as follows:
|
||||
// for.body:
|
||||
// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
|
||||
// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||||
// ...
|
||||
// %cmp = icmp sgt i32 %5, 3
|
||||
// %spec.select = select i1 %cmp, i32 %i, i32 %r
|
||||
// %inc = add nsw i32 %i, 1
|
||||
// ...
|
||||
// Since 'i' is an increasing induction variable, the reduction value after the
|
||||
// loop will be the maximum value of 'i' that the condition (src[i] > 3) is
|
||||
// satisfied, or the start value (0 in the example above). When the start value
|
||||
// of the increasing induction variable 'i' is greater than the minimum value of
|
||||
// the data type, we can use the minimum value of the data type as a sentinel
|
||||
// value to replace the start value. This allows us to perform a single
|
||||
// reduction max operation to obtain the final reduction result.
|
||||
// TODO: It is possible to solve the case where the start value is the minimum
|
||||
// value of the data type or a non-constant value by using mask and multiple
|
||||
// reduction operations.
|
||||
RecurrenceDescriptor::InstDesc
|
||||
RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
|
||||
Instruction *I, ScalarEvolution &SE) {
|
||||
// TODO: Support the vectorization of FindLastIV when the reduction phi is
|
||||
// used by more than one select instruction. This vectorization is only
|
||||
// performed when the SCEV of each increasing induction variable used by the
|
||||
// select instructions is identical.
|
||||
if (!OrigPhi->hasOneUse())
|
||||
return InstDesc(false, I);
|
||||
|
||||
// TODO: Match selects with multi-use cmp conditions.
|
||||
Value *NonRdxPhi = nullptr;
|
||||
if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
|
||||
m_Specific(OrigPhi)),
|
||||
m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi),
|
||||
m_Value(NonRdxPhi)))))
|
||||
return InstDesc(false, I);
|
||||
|
||||
auto IsIncreasingLoopInduction = [&](Value *V) {
|
||||
Type *Ty = V->getType();
|
||||
if (!SE.isSCEVable(Ty))
|
||||
return false;
|
||||
|
||||
auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V));
|
||||
if (!AR || AR->getLoop() != TheLoop)
|
||||
return false;
|
||||
|
||||
const SCEV *Step = AR->getStepRecurrence(SE);
|
||||
if (!SE.isKnownPositive(Step))
|
||||
return false;
|
||||
|
||||
const ConstantRange IVRange = SE.getSignedRange(AR);
|
||||
unsigned NumBits = Ty->getIntegerBitWidth();
|
||||
// Keep the minimum value of the recurrence type as the sentinel value.
|
||||
// The maximum acceptable range for the increasing induction variable,
|
||||
// called the valid range, will be defined as
|
||||
// [<sentinel value> + 1, <sentinel value>)
|
||||
// where <sentinel value> is SignedMin(<recurrence type>)
|
||||
// TODO: This range restriction can be lifted by adding an additional
|
||||
// virtual OR reduction.
|
||||
const APInt Sentinel = APInt::getSignedMinValue(NumBits);
|
||||
const ConstantRange ValidRange =
|
||||
ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
|
||||
LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange
|
||||
<< ", and the signed range of " << *AR << " is "
|
||||
<< IVRange << "\n");
|
||||
// Ensure the induction variable does not wrap around by verifying that its
|
||||
// range is fully contained within the valid range.
|
||||
return ValidRange.contains(IVRange);
|
||||
};
|
||||
|
||||
// We are looking for selects of the form:
|
||||
// select(cmp(), phi, increasing_loop_induction) or
|
||||
// select(cmp(), increasing_loop_induction, phi)
|
||||
// TODO: Support for monotonically decreasing induction variable
|
||||
if (!IsIncreasingLoopInduction(NonRdxPhi))
|
||||
return InstDesc(false, I);
|
||||
|
||||
return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IFindLastIV
|
||||
: RecurKind::FFindLastIV);
|
||||
}
|
||||
|
||||
RecurrenceDescriptor::InstDesc
|
||||
RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
|
||||
const InstDesc &Prev) {
|
||||
@ -756,10 +847,9 @@ RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
|
||||
return InstDesc(true, SI);
|
||||
}
|
||||
|
||||
RecurrenceDescriptor::InstDesc
|
||||
RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
|
||||
Instruction *I, RecurKind Kind,
|
||||
InstDesc &Prev, FastMathFlags FuncFMF) {
|
||||
RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
|
||||
Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
|
||||
FastMathFlags FuncFMF, ScalarEvolution *SE) {
|
||||
assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
|
||||
switch (I->getOpcode()) {
|
||||
default:
|
||||
@ -789,6 +879,8 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
|
||||
if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
|
||||
Kind == RecurKind::Add || Kind == RecurKind::Mul)
|
||||
return isConditionalRdxPattern(Kind, I);
|
||||
if (isFindLastIVRecurrenceKind(Kind) && SE)
|
||||
return isFindLastIVPattern(L, OrigPhi, I, *SE);
|
||||
[[fallthrough]];
|
||||
case Instruction::FCmp:
|
||||
case Instruction::ICmp:
|
||||
@ -893,6 +985,15 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
|
||||
<< *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RecurKind::IFindLastIV, TheLoop, FMF, RedDes, DB, AC,
|
||||
DT, SE)) {
|
||||
LLVM_DEBUG(dbgs() << "Found a "
|
||||
<< (RedDes.getRecurrenceKind() == RecurKind::FFindLastIV
|
||||
? "F"
|
||||
: "I")
|
||||
<< "FindLastIV reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
|
||||
SE)) {
|
||||
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
|
||||
@ -1048,12 +1149,14 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
|
||||
case RecurKind::UMax:
|
||||
case RecurKind::UMin:
|
||||
case RecurKind::IAnyOf:
|
||||
case RecurKind::IFindLastIV:
|
||||
return Instruction::ICmp;
|
||||
case RecurKind::FMax:
|
||||
case RecurKind::FMin:
|
||||
case RecurKind::FMaximum:
|
||||
case RecurKind::FMinimum:
|
||||
case RecurKind::FAnyOf:
|
||||
case RecurKind::FFindLastIV:
|
||||
return Instruction::FCmp;
|
||||
default:
|
||||
llvm_unreachable("Unknown recurrence operation");
|
||||
|
@ -1208,6 +1208,23 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src,
|
||||
return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
|
||||
}
|
||||
|
||||
Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
|
||||
const RecurrenceDescriptor &Desc) {
|
||||
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
|
||||
Desc.getRecurrenceKind()) &&
|
||||
"Unexpected reduction kind");
|
||||
Value *StartVal = Desc.getRecurrenceStartValue();
|
||||
Value *Sentinel = Desc.getSentinelValue();
|
||||
Value *MaxRdx = Src->getType()->isVectorTy()
|
||||
? Builder.CreateIntMaxReduce(Src, true)
|
||||
: Src;
|
||||
// Correct the final reduction result back to the start value if the maximum
|
||||
// reduction is sentinel value.
|
||||
Value *Cmp =
|
||||
Builder.CreateCmp(CmpInst::ICMP_NE, MaxRdx, Sentinel, "rdx.select.cmp");
|
||||
return Builder.CreateSelect(Cmp, MaxRdx, StartVal, "rdx.select");
|
||||
}
|
||||
|
||||
Value *llvm::getReductionIdentity(Intrinsic::ID RdxID, Type *Ty,
|
||||
FastMathFlags Flags) {
|
||||
bool Negative = false;
|
||||
@ -1315,6 +1332,8 @@ Value *llvm::createReduction(IRBuilderBase &B,
|
||||
RecurKind RK = Desc.getRecurrenceKind();
|
||||
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
|
||||
return createAnyOfReduction(B, Src, Desc, OrigPhi);
|
||||
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
|
||||
return createFindLastIVReduction(B, Src, Desc);
|
||||
|
||||
return createSimpleReduction(B, Src, RK);
|
||||
}
|
||||
|
@ -5185,8 +5185,9 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
|
||||
HasReductions &&
|
||||
any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
|
||||
const RecurrenceDescriptor &RdxDesc = Reduction.second;
|
||||
return RecurrenceDescriptor::isAnyOfRecurrenceKind(
|
||||
RdxDesc.getRecurrenceKind());
|
||||
RecurKind RK = RdxDesc.getRecurrenceKind();
|
||||
return RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
|
||||
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK);
|
||||
});
|
||||
if (HasSelectCmpReductions) {
|
||||
LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
|
||||
@ -9449,8 +9450,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
|
||||
|
||||
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
|
||||
RecurKind Kind = RdxDesc.getRecurrenceKind();
|
||||
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
|
||||
"AnyOf reductions are not allowed for in-loop reductions");
|
||||
assert(
|
||||
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
|
||||
!RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
|
||||
"AnyOf and FindLast reductions are not allowed for in-loop reductions");
|
||||
|
||||
// Collect the chain of "link" recipes for the reduction starting at PhiR.
|
||||
SetVector<VPSingleDefRecipe *> Worklist;
|
||||
|
@ -20451,6 +20451,8 @@ private:
|
||||
case RecurKind::FMulAdd:
|
||||
case RecurKind::IAnyOf:
|
||||
case RecurKind::FAnyOf:
|
||||
case RecurKind::IFindLastIV:
|
||||
case RecurKind::FFindLastIV:
|
||||
case RecurKind::None:
|
||||
llvm_unreachable("Unexpected reduction kind for repeated scalar.");
|
||||
}
|
||||
@ -20548,6 +20550,8 @@ private:
|
||||
case RecurKind::FMulAdd:
|
||||
case RecurKind::IAnyOf:
|
||||
case RecurKind::FAnyOf:
|
||||
case RecurKind::IFindLastIV:
|
||||
case RecurKind::FFindLastIV:
|
||||
case RecurKind::None:
|
||||
llvm_unreachable("Unexpected reduction kind for reused scalars.");
|
||||
}
|
||||
|
@ -567,6 +567,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
|
||||
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
|
||||
ReducedPartRdx = Builder.CreateBinOp(
|
||||
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
|
||||
else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
|
||||
ReducedPartRdx =
|
||||
createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
|
||||
else
|
||||
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
|
||||
}
|
||||
@ -575,7 +578,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
|
||||
// Create the reduction after the loop. Note that inloop reductions create
|
||||
// the target reduction in the loop using a Reduction recipe.
|
||||
if ((State.VF.isVector() ||
|
||||
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
|
||||
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
|
||||
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
|
||||
!PhiR->isInLoop()) {
|
||||
ReducedPartRdx =
|
||||
createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
|
||||
@ -3398,6 +3402,20 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
|
||||
Builder.SetInsertPoint(VectorPH->getTerminator());
|
||||
StartV = Iden = State.get(StartVPV);
|
||||
}
|
||||
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
|
||||
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
|
||||
// phi. In the exit block, ComputeReductionResult will generate checks to
|
||||
// verify if the reduction result is the sentinel value. If the result is
|
||||
// the sentinel value, it will be corrected back to the start value.
|
||||
// TODO: The sentinel value is not always necessary. When the start value is
|
||||
// a constant, and smaller than the start value of the induction variable,
|
||||
// the start value can be directly used to initialize the reduction phi.
|
||||
StartV = Iden = RdxDesc.getSentinelValue();
|
||||
if (!ScalarPHI) {
|
||||
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
|
||||
Builder.SetInsertPoint(VectorPH->getTerminator());
|
||||
StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
|
||||
}
|
||||
} else {
|
||||
Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
|
||||
RdxDesc.getFastMathFlags());
|
||||
|
87
llvm/test/Transforms/LoopVectorize/iv-select-cmp-blend.ll
Normal file
87
llvm/test/Transforms/LoopVectorize/iv-select-cmp-blend.ll
Normal file
@ -0,0 +1,87 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
|
||||
|
||||
define i32 @select_icmp_switch(i32 %n, i32 %case, ptr %a, ptr %b) {
|
||||
; CHECK-LABEL: define i32 @select_icmp_switch(
|
||||
; CHECK-SAME: i32 [[N:%.*]], i32 [[CASE:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
|
||||
; CHECK: [[FOR_BODY_PREHEADER]]:
|
||||
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
||||
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
||||
; CHECK: [[FOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDVARS:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_INC:.*]] ]
|
||||
; CHECK-NEXT: [[RDX_PHI:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[RDX_PHI_NEXT:%.*]], %[[FOR_INC]] ]
|
||||
; CHECK-NEXT: switch i32 [[CASE]], label %[[SW_BB0:.*]] [
|
||||
; CHECK-NEXT: i32 0, label %[[SW_BB0]]
|
||||
; CHECK-NEXT: i32 1, label %[[SW_BB1:.*]]
|
||||
; CHECK-NEXT: ]
|
||||
; CHECK: [[SW_BB0]]:
|
||||
; CHECK-NEXT: [[A_ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS]]
|
||||
; CHECK-NEXT: [[A_VALUE:%.*]] = load i8, ptr [[A_ARRAYIDX]], align 1
|
||||
; CHECK-NEXT: [[CMP_A:%.*]] = icmp eq i8 [[A_VALUE]], -1
|
||||
; CHECK-NEXT: [[TRUNC_BB0:%.*]] = trunc i64 [[INDVARS]] to i32
|
||||
; CHECK-NEXT: [[SELECT_BB0:%.*]] = select i1 [[CMP_A]], i32 [[RDX_PHI]], i32 [[TRUNC_BB0]]
|
||||
; CHECK-NEXT: br label %[[FOR_INC]]
|
||||
; CHECK: [[SW_BB1]]:
|
||||
; CHECK-NEXT: [[B_ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDVARS]]
|
||||
; CHECK-NEXT: [[B_VALUE:%.*]] = load i8, ptr [[B_ARRAYIDX]], align 1
|
||||
; CHECK-NEXT: [[CMP_B:%.*]] = icmp eq i8 [[B_VALUE]], -1
|
||||
; CHECK-NEXT: [[TRUNC_BB1:%.*]] = trunc i64 [[INDVARS]] to i32
|
||||
; CHECK-NEXT: [[SELECT_BB1:%.*]] = select i1 [[CMP_B]], i32 [[RDX_PHI]], i32 [[TRUNC_BB1]]
|
||||
; CHECK-NEXT: br label %[[FOR_INC]]
|
||||
; CHECK: [[FOR_INC]]:
|
||||
; CHECK-NEXT: [[RDX_PHI_NEXT]] = phi i32 [ [[SELECT_BB0]], %[[SW_BB0]] ], [ [[SELECT_BB1]], %[[SW_BB1]] ]
|
||||
; CHECK-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1
|
||||
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[WIDE_TRIP_COUNT]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
||||
; CHECK: [[FOR_END_LOOPEXIT]]:
|
||||
; CHECK-NEXT: [[RDX_PHI_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_PHI_NEXT]], %[[FOR_INC]] ]
|
||||
; CHECK-NEXT: br label %[[FOR_END]]
|
||||
; CHECK: [[FOR_END]]:
|
||||
; CHECK-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_PHI_NEXT_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
||||
; CHECK-NEXT: ret i32 [[SELECT_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
%cmp.sgt = icmp sgt i32 %n, 0
|
||||
br i1 %cmp.sgt, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader:
|
||||
%wide.trip.count = zext i32 %n to i64
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars = phi i64 [ 0, %for.body.preheader ], [ %indvars.next, %for.inc ]
|
||||
%rdx.phi = phi i32 [ 0, %for.body.preheader ], [ %rdx.phi.next, %for.inc ]
|
||||
switch i32 %case, label %sw.bb0 [
|
||||
i32 0, label %sw.bb0
|
||||
i32 1, label %sw.bb1
|
||||
]
|
||||
|
||||
sw.bb0:
|
||||
%a.arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars
|
||||
%a.value = load i8, ptr %a.arrayidx, align 1
|
||||
%cmp.a = icmp eq i8 %a.value, -1
|
||||
%trunc.bb0 = trunc i64 %indvars to i32
|
||||
%select.bb0 = select i1 %cmp.a, i32 %rdx.phi, i32 %trunc.bb0
|
||||
br label %for.inc
|
||||
|
||||
sw.bb1:
|
||||
%b.arrayidx = getelementptr inbounds i8, ptr %b, i64 %indvars
|
||||
%b.value = load i8, ptr %b.arrayidx, align 1
|
||||
%cmp.b = icmp eq i8 %b.value, -1
|
||||
%trunc.bb1 = trunc i64 %indvars to i32
|
||||
%select.bb1 = select i1 %cmp.b, i32 %rdx.phi, i32 %trunc.bb1
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%rdx.phi.next = phi i32 [ %select.bb0, %sw.bb0 ], [ %select.bb1, %sw.bb1 ]
|
||||
%indvars.next = add nuw nsw i64 %indvars, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.next, %wide.trip.count
|
||||
br i1 %exitcond.not, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
%select.lcssa = phi i32 [ %rdx.phi.next, %for.inc ], [ 0, %entry ]
|
||||
ret i32 %select.lcssa
|
||||
}
|
@ -5,10 +5,42 @@ define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
|
||||
; CHECK-LABEL: define i64 @select_icmp_nuw_nsw(
|
||||
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
||||
; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
|
||||
; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
|
||||
; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II]]
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
||||
; CHECK: [[SCALAR_PH]]:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
||||
; CHECK: [[FOR_BODY]]:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
||||
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
|
||||
@ -17,9 +49,9 @@ define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
|
||||
; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
|
||||
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
|
||||
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
||||
; CHECK-NEXT: ret i64 [[COND_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
@ -46,10 +78,42 @@ define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
|
||||
; CHECK-LABEL: define i64 @select_icmp_nsw(
|
||||
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
||||
; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
|
||||
; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
|
||||
; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II]]
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
||||
; CHECK: [[SCALAR_PH]]:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
||||
; CHECK: [[FOR_BODY]]:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[II]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
||||
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
|
||||
@ -58,9 +122,9 @@ define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
|
||||
; CHECK-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
|
||||
; CHECK-NEXT: [[INC]] = add nsw i64 [[IV]], 1
|
||||
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
||||
; CHECK-NEXT: ret i64 [[COND_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
@ -164,3 +228,11 @@ for.body: ; preds = %entry, %for.body
|
||||
exit: ; preds = %for.body
|
||||
ret i64 %cond
|
||||
}
|
||||
;.
|
||||
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
||||
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
||||
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
||||
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
||||
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
||||
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
||||
;.
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user