mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 23:36:40 +00:00
[LV][VPlan] Use VF VPValue in VPVectorPointerRecipe (#110974)
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.
This commit is contained in:
parent
0f4b3c409f
commit
266ff98cba
@ -4492,6 +4492,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
|
||||
case VPDef::VPInstructionSC:
|
||||
case VPDef::VPCanonicalIVPHISC:
|
||||
case VPDef::VPVectorPointerSC:
|
||||
case VPDef::VPReverseVectorPointerSC:
|
||||
case VPDef::VPExpandSCEVSC:
|
||||
case VPDef::VPEVLBasedIVPHISC:
|
||||
case VPDef::VPPredInstPHISC:
|
||||
@ -8278,9 +8279,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
|
||||
if (Consecutive) {
|
||||
auto *GEP = dyn_cast<GetElementPtrInst>(
|
||||
Ptr->getUnderlyingValue()->stripPointerCasts());
|
||||
auto *VectorPtr = new VPVectorPointerRecipe(
|
||||
Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
|
||||
I->getDebugLoc());
|
||||
VPSingleDefRecipe *VectorPtr;
|
||||
if (Reverse)
|
||||
VectorPtr = new VPReverseVectorPointerRecipe(
|
||||
Ptr, &Plan.getVF(), getLoadStoreType(I),
|
||||
GEP ? GEP->isInBounds() : false, I->getDebugLoc());
|
||||
else
|
||||
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
|
||||
GEP ? GEP->isInBounds() : false,
|
||||
I->getDebugLoc());
|
||||
Builder.getInsertBlock()->appendRecipe(VectorPtr);
|
||||
Ptr = VectorPtr;
|
||||
}
|
||||
|
@ -905,6 +905,7 @@ public:
|
||||
case VPRecipeBase::VPReplicateSC:
|
||||
case VPRecipeBase::VPScalarIVStepsSC:
|
||||
case VPRecipeBase::VPVectorPointerSC:
|
||||
case VPRecipeBase::VPReverseVectorPointerSC:
|
||||
case VPRecipeBase::VPWidenCallSC:
|
||||
case VPRecipeBase::VPWidenCanonicalIVSC:
|
||||
case VPRecipeBase::VPWidenCastSC:
|
||||
@ -1110,6 +1111,7 @@ public:
|
||||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
|
||||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
|
||||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
|
||||
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
|
||||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
|
||||
}
|
||||
|
||||
@ -1910,20 +1912,64 @@ public:
|
||||
#endif
|
||||
};
|
||||
|
||||
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
|
||||
/// all parts. If IsReverse is true, compute pointers for accessing the input in
|
||||
/// reverse order per part.
|
||||
/// A recipe to compute the pointers for widened memory accesses of IndexTy
|
||||
/// in reverse order.
|
||||
class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
|
||||
public VPUnrollPartAccessor<2> {
|
||||
Type *IndexedTy;
|
||||
|
||||
public:
|
||||
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
|
||||
bool IsInBounds, DebugLoc DL)
|
||||
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
|
||||
ArrayRef<VPValue *>({Ptr, VF}),
|
||||
GEPFlagsTy(IsInBounds), DL),
|
||||
IndexedTy(IndexedTy) {}
|
||||
|
||||
VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
|
||||
|
||||
VPValue *getVFValue() { return getOperand(1); }
|
||||
const VPValue *getVFValue() const { return getOperand(1); }
|
||||
|
||||
void execute(VPTransformState &State) override;
|
||||
|
||||
bool onlyFirstLaneUsed(const VPValue *Op) const override {
|
||||
assert(is_contained(operands(), Op) &&
|
||||
"Op must be an operand of the recipe");
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if the recipe only uses the first part of operand \p Op.
|
||||
bool onlyFirstPartUsed(const VPValue *Op) const override {
|
||||
assert(is_contained(operands(), Op) &&
|
||||
"Op must be an operand of the recipe");
|
||||
assert(getNumOperands() <= 2 && "must have at most two operands");
|
||||
return true;
|
||||
}
|
||||
|
||||
VPReverseVectorPointerRecipe *clone() override {
|
||||
return new VPReverseVectorPointerRecipe(
|
||||
getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
/// Print the recipe.
|
||||
void print(raw_ostream &O, const Twine &Indent,
|
||||
VPSlotTracker &SlotTracker) const override;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// A recipe to compute the pointers for widened memory accesses of IndexTy.
|
||||
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
|
||||
public VPUnrollPartAccessor<1> {
|
||||
Type *IndexedTy;
|
||||
bool IsReverse;
|
||||
|
||||
public:
|
||||
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
|
||||
bool IsInBounds, DebugLoc DL)
|
||||
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
|
||||
DebugLoc DL)
|
||||
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
|
||||
GEPFlagsTy(IsInBounds), DL),
|
||||
IndexedTy(IndexedTy), IsReverse(IsReverse) {}
|
||||
IndexedTy(IndexedTy) {}
|
||||
|
||||
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
|
||||
|
||||
@ -1944,8 +1990,8 @@ public:
|
||||
}
|
||||
|
||||
VPVectorPointerRecipe *clone() override {
|
||||
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
|
||||
isInBounds(), getDebugLoc());
|
||||
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
|
||||
getDebugLoc());
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
|
@ -263,9 +263,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
|
||||
[](const auto *R) { return R->getScalarType(); })
|
||||
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
|
||||
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
|
||||
VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
|
||||
return inferScalarType(R->getOperand(0));
|
||||
})
|
||||
VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
|
||||
[this](const VPRecipeBase *R) {
|
||||
return inferScalarType(R->getOperand(0));
|
||||
})
|
||||
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
|
||||
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
|
||||
VPWidenSelectRecipe>(
|
||||
|
@ -162,6 +162,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
|
||||
case VPDerivedIVSC:
|
||||
case VPPredInstPHISC:
|
||||
case VPScalarCastSC:
|
||||
case VPReverseVectorPointerSC:
|
||||
return false;
|
||||
case VPInstructionSC:
|
||||
return mayWriteToMemory();
|
||||
@ -1971,38 +1972,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
}
|
||||
#endif
|
||||
|
||||
void VPVectorPointerRecipe ::execute(VPTransformState &State) {
|
||||
auto &Builder = State.Builder;
|
||||
State.setDebugLocFrom(getDebugLoc());
|
||||
unsigned CurrentPart = getUnrollPart(*this);
|
||||
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
|
||||
unsigned CurrentPart, IRBuilderBase &Builder) {
|
||||
// Use i32 for the gep index type when the value is constant,
|
||||
// or query DataLayout for a more suitable index type otherwise.
|
||||
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
|
||||
Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
|
||||
? DL.getIndexType(Builder.getPtrTy(0))
|
||||
: Builder.getInt32Ty();
|
||||
return IsScalable && (IsReverse || CurrentPart > 0)
|
||||
? DL.getIndexType(Builder.getPtrTy(0))
|
||||
: Builder.getInt32Ty();
|
||||
}
|
||||
|
||||
void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
|
||||
auto &Builder = State.Builder;
|
||||
State.setDebugLocFrom(getDebugLoc());
|
||||
unsigned CurrentPart = getUnrollPart(*this);
|
||||
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
|
||||
CurrentPart, Builder);
|
||||
|
||||
// The wide store needs to start at the last vector element.
|
||||
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
|
||||
if (IndexTy != RunTimeVF->getType())
|
||||
RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
|
||||
// NumElt = -CurrentPart * RunTimeVF
|
||||
Value *NumElt = Builder.CreateMul(
|
||||
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
|
||||
// LastLane = 1 - RunTimeVF
|
||||
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
|
||||
Value *Ptr = State.get(getOperand(0), VPLane(0));
|
||||
bool InBounds = isInBounds();
|
||||
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
|
||||
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
|
||||
|
||||
State.set(this, ResultPtr, /*IsScalar*/ true);
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
VPSlotTracker &SlotTracker) const {
|
||||
O << Indent;
|
||||
printAsOperand(O, SlotTracker);
|
||||
O << " = reverse-vector-pointer ";
|
||||
if (isInBounds())
|
||||
O << "inbounds ";
|
||||
printOperands(O, SlotTracker);
|
||||
}
|
||||
#endif
|
||||
|
||||
void VPVectorPointerRecipe::execute(VPTransformState &State) {
|
||||
auto &Builder = State.Builder;
|
||||
State.setDebugLocFrom(getDebugLoc());
|
||||
unsigned CurrentPart = getUnrollPart(*this);
|
||||
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
|
||||
CurrentPart, Builder);
|
||||
Value *Ptr = State.get(getOperand(0), VPLane(0));
|
||||
bool InBounds = isInBounds();
|
||||
|
||||
Value *ResultPtr = nullptr;
|
||||
if (IsReverse) {
|
||||
// If the address is consecutive but reversed, then the
|
||||
// wide store needs to start at the last vector element.
|
||||
// RunTimeVF = VScale * VF.getKnownMinValue()
|
||||
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
|
||||
Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
|
||||
// NumElt = -CurrentPart * RunTimeVF
|
||||
Value *NumElt = Builder.CreateMul(
|
||||
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
|
||||
// LastLane = 1 - RunTimeVF
|
||||
Value *LastLane =
|
||||
Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
|
||||
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
|
||||
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
|
||||
} else {
|
||||
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
|
||||
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
|
||||
}
|
||||
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
|
||||
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
|
||||
|
||||
State.set(this, ResultPtr, /*IsScalar*/ true);
|
||||
}
|
||||
@ -2013,8 +2039,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
O << Indent;
|
||||
printAsOperand(O, SlotTracker);
|
||||
O << " = vector-pointer ";
|
||||
if (IsReverse)
|
||||
O << "(reverse) ";
|
||||
|
||||
printOperands(O, SlotTracker);
|
||||
}
|
||||
|
@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
|
||||
// Add operand indicating the part to generate code for, to recipes still
|
||||
// requiring it.
|
||||
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
|
||||
VPVectorPointerRecipe>(Copy) ||
|
||||
VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
|
||||
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
|
||||
m_VPValue())))
|
||||
Copy->addOperand(getConstantVPV(Part));
|
||||
|
||||
if (isa<VPVectorPointerRecipe>(R))
|
||||
if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
|
||||
Copy->setOperand(0, R.getOperand(0));
|
||||
}
|
||||
}
|
||||
|
@ -346,6 +346,7 @@ public:
|
||||
VPScalarCastSC,
|
||||
VPScalarIVStepsSC,
|
||||
VPVectorPointerSC,
|
||||
VPReverseVectorPointerSC,
|
||||
VPWidenCallSC,
|
||||
VPWidenCanonicalIVSC,
|
||||
VPWidenCastSC,
|
||||
|
@ -24,43 +24,36 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[INDEX]], -1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[N]], [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP12]], align 8
|
||||
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP18]], align 8
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[TMP22]], 3
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP23]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[TMP27]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = sub i64 1, [[TMP27]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]]
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]]
|
||||
; CHECK-NEXT: store <vscale x 8 x double> [[TMP19]], ptr [[TMP25]], align 8
|
||||
; CHECK-NEXT: store <vscale x 8 x double> [[TMP20]], ptr [[TMP31]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[N]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
|
||||
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]]
|
||||
; CHECK-NEXT: store <vscale x 8 x double> [[TMP16]], ptr [[TMP20]], align 8
|
||||
; CHECK-NEXT: store <vscale x 8 x double> [[TMP17]], ptr [[TMP24]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
@ -75,8 +68,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
|
||||
; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||
; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = load double, ptr [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP33]], 1.000000e+00
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP26]], 1.000000e+00
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
|
||||
; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
|
||||
@ -126,43 +119,36 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 3
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP7]], 4
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[INDEX]], -1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[N]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 3
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = sub i64 1, [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP16]], 3
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 0, [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i64 [[TMP19]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i64>, ptr [[TMP21]], align 8
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD3]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[TMP25]], 3
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP26]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP27]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = shl i64 [[TMP29]], 3
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[TMP30]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = sub i64 1, [[TMP30]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP31]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[TMP33]], i64 [[TMP32]]
|
||||
; CHECK-NEXT: store <vscale x 8 x i64> [[TMP22]], ptr [[TMP28]], align 8
|
||||
; CHECK-NEXT: store <vscale x 8 x i64> [[TMP23]], ptr [[TMP34]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[N]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP16]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP14]], align 8
|
||||
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i64>, ptr [[TMP18]], align 8
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD3]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP22]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = sub i64 0, [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP24]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP25]]
|
||||
; CHECK-NEXT: store <vscale x 8 x i64> [[TMP19]], ptr [[TMP23]], align 8
|
||||
; CHECK-NEXT: store <vscale x 8 x i64> [[TMP20]], ptr [[TMP27]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
@ -177,8 +163,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
|
||||
; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||
; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]]
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP36]], 1
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP29]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]]
|
||||
; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1
|
||||
|
@ -54,6 +54,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
|
||||
; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
|
||||
; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
|
||||
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
|
||||
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
|
||||
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
@ -74,11 +75,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
|
||||
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
|
||||
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]>
|
||||
; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1>
|
||||
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9>
|
||||
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
|
||||
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
|
||||
@ -138,6 +139,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
|
||||
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
|
||||
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' {
|
||||
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
|
||||
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
|
||||
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
@ -158,11 +160,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
|
||||
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
|
||||
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]>
|
||||
; CHECK-NEXT: WIDEN ir<%add9> = add ir<%13>, ir<1>
|
||||
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9>
|
||||
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
|
||||
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
|
||||
@ -259,6 +261,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
|
||||
; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
|
||||
; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
|
||||
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
|
||||
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
|
||||
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
@ -279,11 +282,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
|
||||
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
|
||||
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]>
|
||||
; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00>
|
||||
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv1>
|
||||
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
|
||||
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
|
||||
@ -343,6 +346,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
|
||||
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
|
||||
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' {
|
||||
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
|
||||
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
|
||||
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
@ -363,11 +367,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
|
||||
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
|
||||
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
|
||||
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]>
|
||||
; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%13>, ir<1.000000e+00>
|
||||
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx3>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1>
|
||||
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
|
||||
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
|
||||
|
@ -28,34 +28,30 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
|
||||
; IF-EVL: vector.body:
|
||||
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true)
|
||||
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
|
||||
; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; IF-EVL-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], -1
|
||||
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]]
|
||||
; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
|
||||
; IF-EVL-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP11]]
|
||||
; IF-EVL-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP11]]
|
||||
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP12]]
|
||||
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP13]]
|
||||
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP8]]
|
||||
; IF-EVL-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4
|
||||
; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 0, [[TMP18]]
|
||||
; IF-EVL-NEXT: [[TMP20:%.*]] = sub i64 1, [[TMP18]]
|
||||
; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP19]]
|
||||
; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP20]]
|
||||
; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP22]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP6]] to i64
|
||||
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1
|
||||
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]]
|
||||
; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]]
|
||||
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]]
|
||||
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
|
||||
; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]]
|
||||
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]]
|
||||
; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
|
||||
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; IF-EVL: middle.block:
|
||||
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
|
||||
; IF-EVL: scalar.ph:
|
||||
@ -131,49 +127,45 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
|
||||
; IF-EVL: vector.body:
|
||||
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true)
|
||||
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
|
||||
; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32
|
||||
; IF-EVL-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX3]], 0
|
||||
; IF-EVL-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX3]], 0
|
||||
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EVL_BASED_IV]], i64 0
|
||||
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; IF-EVL-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
|
||||
; IF-EVL-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]]
|
||||
; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
|
||||
; IF-EVL-NEXT: [[TMP11:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[TMP7]], -1
|
||||
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP8]]
|
||||
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
|
||||
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; IF-EVL-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> zeroinitializer
|
||||
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]]
|
||||
; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
|
||||
; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 0, [[TMP19]]
|
||||
; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 1, [[TMP19]]
|
||||
; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP20]]
|
||||
; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP21]]
|
||||
; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP23]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]]
|
||||
; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4
|
||||
; IF-EVL-NEXT: [[TMP27:%.*]] = mul i64 0, [[TMP26]]
|
||||
; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 1, [[TMP26]]
|
||||
; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP27]]
|
||||
; IF-EVL-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i64 [[TMP28]]
|
||||
; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP30]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP6]])
|
||||
; IF-EVL-NEXT: [[TMP31:%.*]] = zext i32 [[TMP6]] to i64
|
||||
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP31]], [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
|
||||
; IF-EVL-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
|
||||
; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]]
|
||||
; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP6]], -1
|
||||
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP7]]
|
||||
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
|
||||
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
|
||||
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]]
|
||||
; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]]
|
||||
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]]
|
||||
; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
|
||||
; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]]
|
||||
; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]]
|
||||
; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
|
||||
; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64
|
||||
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]]
|
||||
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
|
||||
; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; IF-EVL: middle.block:
|
||||
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
|
||||
; IF-EVL: scalar.ph:
|
||||
|
@ -38,10 +38,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]]
|
||||
; CHECK-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP11]])
|
||||
|
@ -20,11 +20,11 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) {
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], -1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], -1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 -4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
@ -93,11 +93,11 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) {
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i128 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i128 [[TMP0]], -1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[TMP0]], -1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 -4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
@ -176,11 +176,11 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
|
||||
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[TMP4]], -1
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[TMP4]], -1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 -4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 -4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 -3
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
|
||||
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
|
@ -1101,6 +1101,7 @@ exit:
|
||||
define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
|
||||
; CHECK-LABEL: LV: Checking a loop in 'ptr_induction_remove_dead_recipe'
|
||||
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
|
||||
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
|
||||
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
|
||||
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
|
||||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
|
||||
@ -1115,11 +1116,11 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
|
||||
; CHECK-NEXT: <x1> vector loop: {
|
||||
; CHECK-NEXT: vector.body:
|
||||
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
|
||||
; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<%3> * ir<-1>
|
||||
; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<-1>
|
||||
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
|
||||
; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%ptr.iv.next>
|
||||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%ptr.iv.next>, vp<[[VF]]>
|
||||
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
|
||||
; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<0>
|
||||
; CHECK-NEXT: EMIT vp<[[NEG:%.+]]> = not ir<%c.1>
|
||||
|
Loading…
x
Reference in New Issue
Block a user