mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 02:26:05 +00:00
[LoopVectorize] Add overflow checks when tail-folding with scalable vectors
In InnerLoopVectorizer::getOrCreateVectorTripCount there is an assert that the known minimum value for the VF is a power of 2 when tail-folding is enabled. However, for scalable vectors the value of vscale may not be a power of 2, which means we have to worry about the possibility of overflow. I have solved this problem by adding preheader checks that prevent us from entering the vector body if the canonical IV would overflow, i.e. if ((IntMax - TripCount) < (VF * UF)) ... skip vector loop ... Differential Revision: https://reviews.llvm.org/D125235
This commit is contained in:
parent
ddfee07519
commit
92c645b5c1
@ -612,7 +612,7 @@ protected:
|
||||
|
||||
/// Emit a bypass check to see if the vector trip count is zero, including if
|
||||
/// it overflows.
|
||||
void emitMinimumIterationCountCheck(BasicBlock *Bypass);
|
||||
void emitIterationCountCheck(BasicBlock *Bypass);
|
||||
|
||||
/// Emit a bypass check to see if all of the SCEV assumptions we've
|
||||
/// had to make are correct. Returns the block containing the checks or
|
||||
@ -886,8 +886,7 @@ protected:
|
||||
/// Emits an iteration count bypass check once for the main loop (when \p
|
||||
/// ForEpilogue is false) and once for the epilogue loop (when \p
|
||||
/// ForEpilogue is true).
|
||||
BasicBlock *emitMinimumIterationCountCheck(BasicBlock *Bypass,
|
||||
bool ForEpilogue);
|
||||
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
|
||||
void printDebugTracesAtStart() override;
|
||||
void printDebugTracesAtEnd() override;
|
||||
};
|
||||
@ -2893,6 +2892,8 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
|
||||
// overflows: the vector induction variable will eventually wrap to zero given
|
||||
// that it starts at zero and its Step is a power of two; the loop will then
|
||||
// exit, with the last early-exit vector comparison also producing all-true.
|
||||
// For scalable vectors the VF is not guaranteed to be a power of 2, but this
|
||||
// is accounted for in emitIterationCountCheck that adds an overflow check.
|
||||
if (Cost->foldTailByMasking()) {
|
||||
assert(isPowerOf2_32(VF.getKnownMinValue() * UF) &&
|
||||
"VF*UF must be a power of 2 when folding tail by masking");
|
||||
@ -2955,7 +2956,7 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
|
||||
return Builder.CreateBitOrPointerCast(CastVal, DstFVTy);
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::emitMinimumIterationCountCheck(BasicBlock *Bypass) {
|
||||
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
|
||||
Value *Count = getOrCreateTripCount(LoopVectorPreHeader);
|
||||
// Reuse existing vector loop preheader for TC checks.
|
||||
// Note that new preheader block is generated for vector loop.
|
||||
@ -2971,10 +2972,23 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(BasicBlock *Bypass) {
|
||||
: ICmpInst::ICMP_ULT;
|
||||
|
||||
// If tail is to be folded, vector loop takes care of all iterations.
|
||||
Type *CountTy = Count->getType();
|
||||
Value *CheckMinIters = Builder.getFalse();
|
||||
if (!Cost->foldTailByMasking()) {
|
||||
Value *Step = createStepForVF(Builder, Count->getType(), VF, UF);
|
||||
Value *Step = createStepForVF(Builder, CountTy, VF, UF);
|
||||
if (!Cost->foldTailByMasking())
|
||||
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
|
||||
else if (VF.isScalable()) {
|
||||
// vscale is not necessarily a power-of-2, which means we cannot guarantee
|
||||
// an overflow to zero when updating induction variables and so an
|
||||
// additional overflow check is required before entering the vector loop.
|
||||
|
||||
// Get the maximum unsigned value for the type.
|
||||
Value *MaxUIntTripCount =
|
||||
ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
|
||||
Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
|
||||
|
||||
// Don't execute the vector loop if (UMax - n) < (VF * UF).
|
||||
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
|
||||
}
|
||||
// Create new preheader for vector loop.
|
||||
LoopVectorPreHeader =
|
||||
@ -3281,7 +3295,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
|
||||
// backedge-taken count is uint##_max: adding one to it will overflow leading
|
||||
// to an incorrect trip count of zero. In this (rare) case we will also jump
|
||||
// to the scalar loop.
|
||||
emitMinimumIterationCountCheck(LoopScalarPreHeader);
|
||||
emitIterationCountCheck(LoopScalarPreHeader);
|
||||
|
||||
// Generate the code to check any assumptions that we've made for SCEV
|
||||
// expressions.
|
||||
@ -7758,7 +7772,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
|
||||
// Generate the code to check the minimum iteration count of the vector
|
||||
// epilogue (see below).
|
||||
EPI.EpilogueIterationCountCheck =
|
||||
emitMinimumIterationCountCheck(LoopScalarPreHeader, true);
|
||||
emitIterationCountCheck(LoopScalarPreHeader, true);
|
||||
EPI.EpilogueIterationCountCheck->setName("iter.check");
|
||||
|
||||
// Generate the code to check any assumptions that we've made for SCEV
|
||||
@ -7777,7 +7791,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
|
||||
// trip count. Note: the branch will get updated later on when we vectorize
|
||||
// the epilogue.
|
||||
EPI.MainLoopIterationCountCheck =
|
||||
emitMinimumIterationCountCheck(LoopScalarPreHeader, false);
|
||||
emitIterationCountCheck(LoopScalarPreHeader, false);
|
||||
|
||||
// Generate the induction variable.
|
||||
EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
|
||||
@ -7808,8 +7822,8 @@ void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() {
|
||||
}
|
||||
|
||||
BasicBlock *
|
||||
EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(BasicBlock *Bypass,
|
||||
bool ForEpilogue) {
|
||||
EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
|
||||
bool ForEpilogue) {
|
||||
assert(Bypass && "Expected valid bypass basic block.");
|
||||
ElementCount VFactor = ForEpilogue ? EPI.EpilogueVF : VF;
|
||||
unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF;
|
||||
|
@ -10,34 +10,38 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
|
||||
; CHECK-LABEL: @simple_memset(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT6]], <vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label %middle.block, label %vector.body
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %while.end.loopexit, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %while.body
|
||||
|
@ -8,15 +8,19 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
|
||||
; CHECK-LABEL: @simple_memset(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 16
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
@ -29,53 +33,53 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX1]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 12
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 0
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 1
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX1]], [[TMP19]]
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP5]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP10]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP15]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP20]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, i32* [[TMP21]], i32 0
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x i32>* [[TMP26]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[TMP27]], 4
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, i32* [[TMP21]], i32 [[TMP28]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX1]], [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 1
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 12
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], 0
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 1
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX1]], [[TMP23]]
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP14]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP19]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP24]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP24]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, i32* [[TMP25]], i32 0
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP29]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT6]], <vscale x 4 x i32>* [[TMP30]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK2]])
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x i32>* [[TMP30]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], 8
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP21]], i32 [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], 4
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP25]], i32 [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT8]], <vscale x 4 x i32>* [[TMP34]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK3]])
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT6]], <vscale x 4 x i32>* [[TMP34]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK2]])
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], 12
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i32, i32* [[TMP21]], i32 [[TMP36]]
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], 8
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i32, i32* [[TMP25]], i32 [[TMP36]]
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = bitcast i32* [[TMP37]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT10]], <vscale x 4 x i32>* [[TMP38]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK4]])
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 16
|
||||
; CHECK-NEXT: [[INDEX_NEXT11]] = add i64 [[INDEX1]], [[TMP40]]
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT8]], <vscale x 4 x i32>* [[TMP38]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK3]])
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], 12
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i32, i32* [[TMP25]], i32 [[TMP40]]
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT10]], <vscale x 4 x i32>* [[TMP42]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK4]])
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = mul i64 [[TMP43]], 16
|
||||
; CHECK-NEXT: [[INDEX_NEXT11]] = add i64 [[INDEX1]], [[TMP44]]
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
@ -98,15 +102,19 @@ define void @cond_memset(i32 %val, i32* noalias readonly %cond_ptr, i32* noalias
|
||||
; CHECK-LABEL: @cond_memset(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 16
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
@ -119,83 +127,83 @@ define void @cond_memset(i32 %val, i32* noalias readonly %cond_ptr, i32* noalias
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX1]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 12
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 0
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 1
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX1]], [[TMP19]]
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP5]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP10]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP15]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP20]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[COND_PTR:%.*]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, i32* [[TMP21]], i32 0
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP26]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[TMP27]], 4
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, i32* [[TMP21]], i32 [[TMP28]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX1]], [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 1
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 12
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], 0
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 1
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX1]], [[TMP23]]
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP14]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP19]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP24]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, i32* [[COND_PTR:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP24]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, i32* [[TMP25]], i32 0
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP29]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP30]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP30]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], 8
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP21]], i32 [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], 4
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP25]], i32 [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP34]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK3]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP34]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], 12
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i32, i32* [[TMP21]], i32 [[TMP36]]
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], 8
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i32, i32* [[TMP25]], i32 [[TMP36]]
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = bitcast i32* [[TMP37]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP38]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK4]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD5]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD6]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD7]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP39]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 4 x i1> [[TMP40]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK3]], <vscale x 4 x i1> [[TMP41]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK4]], <vscale x 4 x i1> [[TMP42]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, i32* [[TMP43]], i32 0
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x i32>* [[TMP52]], i32 4, <vscale x 4 x i1> [[TMP47]])
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = mul i32 [[TMP53]], 4
|
||||
; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i32, i32* [[TMP43]], i32 [[TMP54]]
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP38]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK3]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], 12
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i32, i32* [[TMP25]], i32 [[TMP40]]
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP42]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK4]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD5]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD6]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD7]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP24]]
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP43]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 4 x i1> [[TMP44]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK3]], <vscale x 4 x i1> [[TMP45]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK4]], <vscale x 4 x i1> [[TMP46]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i32, i32* [[TMP47]], i32 0
|
||||
; CHECK-NEXT: [[TMP56:%.*]] = bitcast i32* [[TMP55]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT9]], <vscale x 4 x i32>* [[TMP56]], i32 4, <vscale x 4 x i1> [[TMP48]])
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x i32>* [[TMP56]], i32 4, <vscale x 4 x i1> [[TMP51]])
|
||||
; CHECK-NEXT: [[TMP57:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP58:%.*]] = mul i32 [[TMP57]], 8
|
||||
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i32, i32* [[TMP43]], i32 [[TMP58]]
|
||||
; CHECK-NEXT: [[TMP58:%.*]] = mul i32 [[TMP57]], 4
|
||||
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i32, i32* [[TMP47]], i32 [[TMP58]]
|
||||
; CHECK-NEXT: [[TMP60:%.*]] = bitcast i32* [[TMP59]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT11]], <vscale x 4 x i32>* [[TMP60]], i32 4, <vscale x 4 x i1> [[TMP49]])
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT9]], <vscale x 4 x i32>* [[TMP60]], i32 4, <vscale x 4 x i1> [[TMP52]])
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], 12
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr i32, i32* [[TMP43]], i32 [[TMP62]]
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], 8
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr i32, i32* [[TMP47]], i32 [[TMP62]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT13]], <vscale x 4 x i32>* [[TMP64]], i32 4, <vscale x 4 x i1> [[TMP50]])
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = mul i64 [[TMP65]], 16
|
||||
; CHECK-NEXT: [[INDEX_NEXT14]] = add i64 [[INDEX1]], [[TMP66]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT11]], <vscale x 4 x i32>* [[TMP64]], i32 4, <vscale x 4 x i1> [[TMP53]])
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = mul i32 [[TMP65]], 12
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr i32, i32* [[TMP47]], i32 [[TMP66]]
|
||||
; CHECK-NEXT: [[TMP68:%.*]] = bitcast i32* [[TMP67]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT13]], <vscale x 4 x i32>* [[TMP68]], i32 4, <vscale x 4 x i1> [[TMP54]])
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP69]], 16
|
||||
; CHECK-NEXT: [[INDEX_NEXT14]] = add i64 [[INDEX1]], [[TMP70]]
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP71]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
|
@ -10,34 +10,38 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
|
||||
; CHECK-LABEL: @simple_memset(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT6]], <vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label %middle.block, label %vector.body
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %while.end.loopexit, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %while.body
|
||||
@ -59,26 +63,30 @@ define void @simple_memcpy(i32* noalias %dst, i32* noalias %src, i64 %n) #0 {
|
||||
; CHECK-LABEL: @simple_memcpy(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[SRC:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[SRC:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP13]], i32 0
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32>* [[TMP15]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
@ -86,9 +94,9 @@ define void @simple_memcpy(i32* noalias %dst, i32* noalias %src, i64 %n) #0 {
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP18]], label %middle.block, label %vector.body, !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %while.end.loopexit, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %while.body
|
||||
@ -115,49 +123,53 @@ define void @copy_stride4(i32* noalias %dst, i32* noalias %src, i64 %n) #0 {
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP7]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 -1, [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]]
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP11]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 4, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 4, [[TMP12]]
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i64> [[TMP12]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP13]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 4, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 4, [[TMP16]]
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i32 0
|
||||
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[INDEX1]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
|
||||
; CHECK-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT4]], [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i64> [[VEC_IV]], i32 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP16]], i64 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[SRC:%.*]], <vscale x 4 x i64> [[VEC_IND]]
|
||||
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP17]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> undef)
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[DST:%.*]], <vscale x 4 x i64> [[VEC_IND]]
|
||||
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32*> [[TMP18]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP20]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[INDEX1]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP18]]
|
||||
; CHECK-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i64> [[VEC_IV]], i32 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP20]], i64 [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[SRC:%.*]], <vscale x 4 x i64> [[VEC_IND]]
|
||||
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP21]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> undef)
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[DST:%.*]], <vscale x 4 x i64> [[VEC_IND]]
|
||||
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32*> [[TMP22]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP24]]
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP21]], label %middle.block, label %vector.body
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %while.end.loopexit, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %while.body
|
||||
@ -181,22 +193,26 @@ define void @simple_gather_scatter(i32* noalias %dst, i32* noalias %src, i32* no
|
||||
; CHECK-LABEL: @simple_gather_scatter(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[IND:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[IND:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
@ -208,9 +224,9 @@ define void @simple_gather_scatter(i32* noalias %dst, i32* noalias %src, i32* no
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label %middle.block, label %vector.body
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %while.end.loopexit, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %while.body
|
||||
@ -237,35 +253,39 @@ while.end.loopexit: ; preds = %while.body
|
||||
define void @uniform_load(i32* noalias %dst, i32* noalias readonly %src, i64 %n) #0 {
|
||||
; CHECK-LABEL: @uniform_load(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[N:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 %n)
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[N]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[SRC:%.*]], align 4
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT4]], <vscale x 4 x i32>* [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x i32>* [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP16]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]]
|
||||
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %for.end, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
|
||||
entry:
|
||||
@ -292,34 +312,38 @@ for.end: ; preds = %for.body, %entry
|
||||
define void @cond_uniform_load(i32* noalias %dst, i32* noalias readonly %src, i32* noalias readonly %cond, i64 %n) #0 {
|
||||
; CHECK-LABEL: @cond_uniform_load(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[N:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[SRC:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32*> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[SRC:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 %n)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[N]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[BROADCAST_SPLAT6]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> undef)
|
||||
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> undef)
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_GATHER]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = or <vscale x 4 x i1> [[TMP15]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <vscale x 4 x i32>*
|
||||
@ -328,9 +352,9 @@ define void @cond_uniform_load(i32* noalias %dst, i32* noalias readonly %src, i3
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP22]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP23]], label %middle.block, label %vector.body
|
||||
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %for.end, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
|
||||
entry:
|
||||
@ -365,35 +389,39 @@ for.end: ; preds = %for.inc, %entry
|
||||
define void @uniform_store(i32* noalias %dst, i32* noalias readonly %src, i64 %n) #0 {
|
||||
; CHECK-LABEL: @uniform_store(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[N:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[DST:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32*> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[DST:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 %n)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[N]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32*> [[BROADCAST_SPLAT4]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32*> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label %middle.block, label %vector.body
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %for.end, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
|
||||
entry:
|
||||
@ -417,39 +445,43 @@ define void @simple_fdiv(float* noalias %dst, float* noalias %src, i64 %n) #0 {
|
||||
; CHECK-LABEL: @simple_fdiv(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[SRC:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, float* [[DST:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[SRC:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, float* [[DST:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, float* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP12]] to <vscale x 4 x float>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison)
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, float* [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <vscale x 4 x float>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP15]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison)
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fdiv <vscale x 4 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD5]]
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP15]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison)
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fdiv <vscale x 4 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP14]] to <vscale x 4 x float>*
|
||||
; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[TMP16]], <vscale x 4 x float>* [[TMP17]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP20]], label %middle.block, label %vector.body, !llvm.loop [[LOOP18:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %while.end.loopexit, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %while.body
|
||||
@ -475,36 +507,40 @@ define i32 @add_reduction_i32(i32* %ptr, i64 %n) #0 {
|
||||
; CHECK-LABEL: @add_reduction_i32(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %vector.ph ], [ [[TMP14:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 0
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP12]])
|
||||
; CHECK-NEXT: [[TMP14]] = add i32 [[TMP13]], [[VEC_PHI]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label %vector.body, !llvm.loop [[LOOP18:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
|
||||
; CHECK-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %while.body
|
||||
@ -527,23 +563,27 @@ define float @add_reduction_f32(float* %ptr, i64 %n) #0 {
|
||||
; CHECK-LABEL: @add_reduction_f32(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT2:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[TMP14:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP8]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[PTR:%.*]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[PTR:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, float* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to <vscale x 4 x float>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison)
|
||||
@ -553,9 +593,9 @@ define float @add_reduction_f32(float* %ptr, i64 %n) #0 {
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label %middle.block, label %vector.body, !llvm.loop [[LOOP22:![0-9]+]]
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label %while.end.loopexit, label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %while.body
|
||||
@ -577,42 +617,47 @@ while.end.loopexit: ; preds = %while.body
|
||||
define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 {
|
||||
; CHECK-LABEL: @cond_xor_reduction(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[N:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
||||
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 7, %vector.ph ], [ [[TMP16:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP5]], i64 [[N]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP8]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP13]], i32 4, <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD1]], <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP14]])
|
||||
; CHECK-NEXT: [[TMP16]] = xor i32 [[TMP15]], [[VEC_PHI]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[N]])
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[TMP14]], i32 0
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <vscale x 4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP17]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison)
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD1]], <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP18]])
|
||||
; CHECK-NEXT: [[TMP20]] = xor i32 [[TMP19]], [[VEC_PHI]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label %scalar.ph
|
||||
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user