[VPlan] Don't convert widen recipes to VP intrinsics in EVL transform (#127180)

This is a copy of #126177, since it was automatically and permanently
closed because I messed up the source branch on my remote

This patch proposes to avoid converting widening recipes to VP
intrinsics during the EVL transform.

IIUC we initially did this to avoid `vl` toggles on RISC-V. However we
now have the RISCVVLOptimizer pass which mostly makes this redundant.

Emitting regular IR instead of VP intrinsics allows more generic
optimisations, both in the middle end and DAGCombiner, and we generally
have better patterns in the RISC-V backend for non-VP nodes. Sticking to
regular IR instructions is likely a lot less work than reimplementing
all of these optimisations for VP intrinsics, and on SPEC CPU 2017 we get
noticeably better code generation.
This commit is contained in:
Luke Lau 2025-02-22 19:38:11 +08:00 committed by GitHub
parent cbd3801acf
commit e23ab73335
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 616 additions and 271 deletions

View File

@ -523,7 +523,6 @@ public:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
@ -710,7 +709,6 @@ public:
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
@ -1113,8 +1111,7 @@ public:
}
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
return R->getVPDefID() == VPRecipeBase::VPWidenSC;
}
static inline bool classof(const VPUser *U) {
@ -1139,54 +1136,6 @@ public:
#endif
};
/// A recipe for widening operations with vector-predication intrinsics with
/// explicit vector length (EVL).
class VPWidenEVLRecipe : public VPWidenRecipe {
using VPRecipeWithIRFlags::transferFlags;
public:
template <typename IterT>
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
addOperand(&EVL);
}
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
transferFlags(W);
}
~VPWidenEVLRecipe() override = default;
VPWidenRecipe *clone() override final {
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
return nullptr;
}
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
/// Produce a vp-intrinsic using the opcode and operands of the recipe,
/// processing EVL elements.
void execute(VPTransformState &State) override final;
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// EVL in that recipe is always the last operand, thus any use before means
// the VPValue should be vectorized.
return getEVL() == Op;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override final;
#endif
};
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Cast instruction opcode.

View File

@ -251,9 +251,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
return R->getResultType();

View File

@ -155,14 +155,6 @@ struct Recipe_match {
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
return false;
if (!(std::is_same_v<VPWidenEVLRecipe, RecipeTys> || ...) &&
isa<VPWidenEVLRecipe>(R)) {
// Don't match VPWidenEVLRecipe if it is not explicitly part of RecipeTys.
// Otherwise we might match it unexpectedly when trying to match
// VPWidenRecipe, of which VPWidenEVLRecipe is a subclass of.
return false;
}
assert(R->getNumOperands() == std::tuple_size<Ops_t>::value &&
"recipe with matched opcode the expected number of operands");

View File

@ -86,7 +86,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@ -132,7 +131,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@ -173,7 +171,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenPHISC:
case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@ -1602,42 +1599,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
}
}
void VPWidenEVLRecipe::execute(VPTransformState &State) {
unsigned Opcode = getOpcode();
// TODO: Support other opcodes
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
State.setDebugLocFrom(getDebugLoc());
assert(State.get(getOperand(0))->getType()->isVectorTy() &&
"VPWidenEVLRecipe should not be used for scalars");
VPValue *EVL = getEVL();
Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
IRBuilderBase &BuilderIR = State.Builder;
VectorBuilder Builder(BuilderIR);
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
SmallVector<Value *, 4> Ops;
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
VPValue *VPOp = getOperand(I);
Ops.push_back(State.get(VPOp));
}
Builder.setMask(Mask).setEVL(EVLArg);
Value *VPInst =
Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
// Currently vp-intrinsics only accept FMF flags.
// TODO: Enable other flags when support is added.
if (isa<FPMathOperator>(VPInst))
setFlags(cast<Instruction>(VPInst));
State.set(this, VPInst);
State.addMetadata(VPInst,
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@ -1647,15 +1608,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
printFlags(O);
printOperands(O, SlotTracker);
}
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
O << " = vp." << Instruction::getOpcodeName(getOpcode());
printFlags(O);
printOperands(O, SlotTracker);
}
#endif
void VPWidenCastRecipe::execute(VPTransformState &State) {

View File

@ -1662,40 +1662,10 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
VPValue *NewMask = GetNewMask(S->getMask());
return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
})
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
unsigned Opcode = W->getOpcode();
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
})
.Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe>(
[&](auto *CR) -> VPRecipeBase * {
Intrinsic::ID VPID;
if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(CR)) {
VPID =
VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID());
} else {
auto *CastR = cast<VPWidenCastRecipe>(CR);
VPID = VPIntrinsic::getForOpcode(CastR->getOpcode());
}
// Not all intrinsics have a corresponding VP intrinsic.
if (VPID == Intrinsic::not_intrinsic)
return nullptr;
assert(VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
"Expected VP intrinsic to have mask and EVL");
SmallVector<VPValue *> Ops(CR->operands());
Ops.push_back(&AllOneMask);
Ops.push_back(&EVL);
return new VPWidenIntrinsicRecipe(
VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc());
})
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
SmallVector<VPValue *> Ops(Sel->operands());
Ops.push_back(&EVL);

View File

@ -351,7 +351,6 @@ public:
VPWidenStoreEVLSC,
VPWidenStoreSC,
VPWidenSC,
VPWidenEVLSC,
VPWidenSelectSC,
VPBlendSC,
VPHistogramSC,

View File

@ -145,10 +145,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
return VerifyEVLUse(*W,
Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
.Case<VPScalarCastRecipe>(
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
.Case<VPInstruction>([&](const VPInstruction *I) {

View File

@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
; Check that EVL tail folded loops from the loop vectorizer are able to have the
; vl of non-VP instructions reduced.
define void @evl_tail_folded(ptr %p, ptr %q) {
; CHECK-LABEL: evl_tail_folded:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: srli a3, a3, 2
; CHECK-NEXT: addi a4, a3, 1023
; CHECK-NEXT: neg a5, a3
; CHECK-NEXT: and a4, a4, a5
; CHECK-NEXT: li a5, 1024
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub a6, a5, a2
; CHECK-NEXT: slli a7, a2, 3
; CHECK-NEXT: vsetvli a6, a6, e64, m2, ta, ma
; CHECK-NEXT: add t0, a0, a7
; CHECK-NEXT: vle64.v v8, (t0)
; CHECK-NEXT: sub a4, a4, a3
; CHECK-NEXT: add a7, a1, a7
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: vse64.v v8, (a7)
; CHECK-NEXT: add a2, a2, a6
; CHECK-NEXT: bnez a4, .LBB0_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl i64 %0, 1
%n.rnd.up = add i64 %1, 1023
%n.mod.vf = urem i64 %n.rnd.up, %1
%n.vec = sub i64 %n.rnd.up, %n.mod.vf
%2 = tail call i64 @llvm.vscale.i64()
%3 = shl i64 %2, 1
br label %vector.body
vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ]
%avl = sub i64 1024, %evl.based.iv
%4 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true)
%5 = getelementptr i64, ptr %p, i64 %evl.based.iv
%vp.op.load = tail call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %5, <vscale x 2 x i1> splat (i1 true), i32 %4)
%6 = add <vscale x 2 x i64> %vp.op.load, splat (i64 1)
%7 = getelementptr i64, ptr %q, i64 %evl.based.iv
tail call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %6, ptr %7, <vscale x 2 x i1> splat (i1 true), i32 %4)
%8 = zext i32 %4 to i64
%index.evl.next = add i64 %evl.based.iv, %8
%index.next = add i64 %index, %3
%9 = icmp eq i64 %index.next, %n.vec
br i1 %9, label %exit, label %vector.body
exit:
ret void
}

View File

@ -143,8 +143,8 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]]
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i16(<vscale x 4 x i16> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[VP_OP_LOAD]] to <vscale x 4 x i32>
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP9]]
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
@ -200,7 +200,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]

View File

@ -27,10 +27,10 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i8(<vscale x 1 x i8> [[VP_OP_LOAD]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> [[TMP7]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[VP_OP1:%.*]] = call <vscale x 1 x i16> @llvm.vp.lshr.nxv1i16(<vscale x 1 x i16> [[VP_OP]], <vscale x 1 x i16> trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>), <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i16(<vscale x 1 x i16> [[VP_OP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP7:%.*]] = zext <vscale x 1 x i8> [[VP_OP_LOAD]] to <vscale x 1 x i16>
; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 1 x i16> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[VP_OP1:%.*]] = lshr <vscale x 1 x i16> [[TMP12]], trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>)
; CHECK-NEXT: [[TMP8:%.*]] = trunc <vscale x 1 x i16> [[VP_OP1]] to <vscale x 1 x i8>
; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> [[TMP8]], <vscale x 1 x ptr> align 1 zeroinitializer, <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]

View File

@ -44,15 +44,15 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i8(<vscale x 8 x i8> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP2:%.*]] = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP3:%.*]] = call <vscale x 8 x i32> @llvm.vp.or.nxv8i32(<vscale x 8 x i32> [[VP_OP2]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[VP_OP_LOAD]] to <vscale x 8 x i32>
; CHECK-NEXT: [[VP_OP:%.*]] = mul <vscale x 8 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = ashr <vscale x 8 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[VP_OP3:%.*]] = or <vscale x 8 x i32> [[TMP23]], zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <vscale x 8 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer, i32 [[TMP11]])
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> [[TMP17]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP18]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i32(<vscale x 8 x i32> [[VP_OP]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP24:%.*]] = trunc <vscale x 8 x i32> [[TMP17]] to <vscale x 8 x i8>
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP24]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[TMP19:%.*]] = trunc <vscale x 8 x i32> [[VP_OP]] to <vscale x 8 x i16>
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> [[TMP19]], <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]

View File

@ -42,7 +42,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = and <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -136,7 +136,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.or.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = or <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -230,7 +230,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.xor.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = xor <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -324,7 +324,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.shl.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = shl <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -418,7 +418,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.lshr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = lshr <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -512,7 +512,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = ashr <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -606,7 +606,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = add <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -700,7 +700,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = sub <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -794,7 +794,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = mul <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 3)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -888,7 +888,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.sdiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 3)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -982,7 +982,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.udiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = udiv <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 3)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -1076,7 +1076,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.srem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = srem <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 3)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -1170,7 +1170,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.urem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = urem <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 3)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@ -1267,7 +1267,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = fadd fast <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@ -1362,7 +1362,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = fsub fast <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@ -1457,7 +1457,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = fmul fast <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@ -1552,7 +1552,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = fdiv fast <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@ -1700,7 +1700,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = fneg fast <vscale x 4 x float> [[VP_OP_LOAD]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])

View File

@ -55,7 +55,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]])
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@ -63,7 +63,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL: [[SCALAR_PH]]:
@ -80,7 +80,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -170,7 +170,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]])
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@ -195,7 +195,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -285,7 +285,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]])
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@ -310,7 +310,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -400,7 +400,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]])
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@ -425,7 +425,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: store i32 [[DOT]], ptr [[GEP11]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -507,7 +507,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP24:%.*]] = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP24:%.*]] = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@ -530,7 +530,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: store i32 [[TMP19]], ptr [[GEP3]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -607,7 +607,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]]
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
; IF-EVL-NEXT: [[TMP17:%.*]] = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
; IF-EVL-NEXT: [[TMP17:%.*]] = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true)
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP17]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
@ -630,7 +630,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: store i32 [[TMP23]], ptr [[GEP3]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -708,9 +708,9 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP13:%.*]] = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.vp.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP27:%.*]] = fpext <vscale x 4 x float> [[VP_OP_LOAD]] to <vscale x 4 x double>
; IF-EVL-NEXT: [[TMP28:%.*]] = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP27]])
; IF-EVL-NEXT: [[TMP15:%.*]] = trunc <vscale x 4 x i64> [[TMP28]] to <vscale x 4 x i32>
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@ -735,7 +735,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: store i32 [[CONV3]], ptr [[GEP5]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -817,9 +817,9 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP13:%.*]] = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP27:%.*]] = fpext <vscale x 4 x float> [[VP_OP_LOAD]] to <vscale x 4 x double>
; IF-EVL-NEXT: [[TMP28:%.*]] = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP27]])
; IF-EVL-NEXT: [[TMP15:%.*]] = trunc <vscale x 4 x i64> [[TMP28]] to <vscale x 4 x i32>
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@ -844,7 +844,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: store i32 [[CONV3]], ptr [[GEP5]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -926,7 +926,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP24:%.*]] = call <vscale x 4 x i32> @llvm.vp.abs.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP24:%.*]] = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@ -949,7 +949,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: store i32 [[COND]], ptr [[GEP9]], align 4
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
@ -1054,3 +1054,25 @@ declare i32 @llvm.cttz.i32(i32, i1 immarg)
declare i64 @llvm.lrint.i64.f64(double)
declare i64 @llvm.llrint.i64.f64(double)
declare i32 @llvm.abs.i32(i32, i1 immarg)
;.
; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]}
; IF-EVL: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]}
; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
; IF-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]]}
; IF-EVL: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]}
; IF-EVL: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]}
;.

View File

@ -47,7 +47,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr align 4 [[TMP15]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]]
; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> [[VP_OP_LOAD]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP16:%.*]] = sext <vscale x 2 x i32> [[VP_OP_LOAD]] to <vscale x 2 x i64>
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr align 8 [[TMP18]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
@ -147,7 +147,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr align 4 [[TMP15]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META9:![0-9]+]]
; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> [[VP_OP_LOAD]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext <vscale x 2 x i32> [[VP_OP_LOAD]] to <vscale x 2 x i64>
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr align 8 [[TMP18]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META12:![0-9]+]], !noalias [[META9]]
@ -247,7 +247,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP15]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META16:![0-9]+]]
; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP16:%.*]] = trunc <vscale x 2 x i64> [[VP_OP_LOAD]] to <vscale x 2 x i32>
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META19:![0-9]+]], !noalias [[META16]]
@ -347,7 +347,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr align 4 [[TMP15]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META23:![0-9]+]]
; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> [[VP_OP_LOAD]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP16:%.*]] = fpext <vscale x 2 x float> [[VP_OP_LOAD]] to <vscale x 2 x double>
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f64.p0(<vscale x 2 x double> [[TMP16]], ptr align 8 [[TMP18]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META26:![0-9]+]], !noalias [[META23]]
@ -447,7 +447,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr align 8 [[TMP15]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META30:![0-9]+]]
; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> [[VP_OP_LOAD]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP16:%.*]] = fptrunc <vscale x 2 x double> [[VP_OP_LOAD]] to <vscale x 2 x float>
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f32.p0(<vscale x 2 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META33:![0-9]+]], !noalias [[META30]]
@ -547,7 +547,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = sitofp <vscale x 4 x i32> [[VP_OP_LOAD]] to <vscale x 4 x float>
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP18]], ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@ -647,7 +647,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = uitofp <vscale x 4 x i32> [[VP_OP_LOAD]] to <vscale x 4 x float>
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP18]], ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@ -747,7 +747,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = fptosi <vscale x 4 x float> [[VP_OP_LOAD]] to <vscale x 4 x i32>
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@ -847,7 +847,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = fptoui <vscale x 4 x float> [[VP_OP_LOAD]] to <vscale x 4 x i32>
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@ -947,7 +947,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = call <vscale x 2 x ptr> @llvm.vp.inttoptr.nxv2p0.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP18:%.*]] = inttoptr <vscale x 2 x i64> [[VP_OP_LOAD]] to <vscale x 2 x ptr>
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[TMP15]]
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds ptr, ptr [[TMP19]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[TMP18]], ptr align 8 [[TMP20]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP14]])

View File

@ -53,7 +53,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP18]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[TMP19]], [[VEC_PHI]]
; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]]
@ -302,7 +302,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[VP_OP_LOAD]]
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[TMP18]], splat (i1 true)
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[TMP20]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP19]]

View File

@ -0,0 +1,407 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=loop-vectorize \
; RUN: -force-tail-folding-style=data-with-evl \
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=IF-EVL
; RUN: opt -passes=loop-vectorize \
; RUN: -force-tail-folding-style=none \
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=NO-VP
define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-LABEL: define void @test_sdiv(
; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] {
; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP13]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL: [[SCALAR_PH]]:
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
; IF-EVL-NEXT: br label %[[LOOP:.*]]
; IF-EVL: [[LOOP]]:
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8
; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8
; IF-EVL-NEXT: [[TMP18:%.*]] = sdiv i64 [[TMP16]], [[TMP17]]
; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]]
; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8
; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
; NO-VP-LABEL: define void @test_sdiv(
; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] {
; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT: br label %[[LOOP:.*]]
; NO-VP: [[LOOP]]:
; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_GEP]], align 8
; NO-VP-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
; NO-VP-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_GEP]], align 8
; NO-VP-NEXT: [[TMP2:%.*]] = sdiv i64 [[TMP0]], [[TMP1]]
; NO-VP-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]]
; NO-VP-NEXT: store i64 [[TMP2]], ptr [[C_GEP]], align 8
; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; NO-VP-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; NO-VP-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
; NO-VP: [[EXIT]]:
; NO-VP-NEXT: ret void
;
loop.preheader:
br label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ]
%a.gep = getelementptr i64, ptr %a, i64 %iv
%0 = load i64, ptr %a.gep
%b.gep = getelementptr i64, ptr %b, i64 %iv
%1 = load i64, ptr %b.gep
%2 = sdiv i64 %0, %1
%c.gep = getelementptr i64, ptr %c, i64 %iv
store i64 %2, ptr %c.gep
%iv.next = add i64 %iv, 1
%done = icmp eq i64 %iv.next, 1024
br i1 %done, label %exit, label %loop
exit:
ret void
}
define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-LABEL: define void @test_udiv(
; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = udiv <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP13]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL: [[SCALAR_PH]]:
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
; IF-EVL-NEXT: br label %[[LOOP:.*]]
; IF-EVL: [[LOOP]]:
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8
; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8
; IF-EVL-NEXT: [[TMP18:%.*]] = udiv i64 [[TMP16]], [[TMP17]]
; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]]
; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8
; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
; NO-VP-LABEL: define void @test_udiv(
; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT: br label %[[LOOP:.*]]
; NO-VP: [[LOOP]]:
; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_GEP]], align 8
; NO-VP-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
; NO-VP-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_GEP]], align 8
; NO-VP-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], [[TMP1]]
; NO-VP-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]]
; NO-VP-NEXT: store i64 [[TMP2]], ptr [[C_GEP]], align 8
; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; NO-VP-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; NO-VP-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
; NO-VP: [[EXIT]]:
; NO-VP-NEXT: ret void
;
loop.preheader:
br label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ]
%a.gep = getelementptr i64, ptr %a, i64 %iv
%0 = load i64, ptr %a.gep
%b.gep = getelementptr i64, ptr %b, i64 %iv
%1 = load i64, ptr %b.gep
%2 = udiv i64 %0, %1
%c.gep = getelementptr i64, ptr %c, i64 %iv
store i64 %2, ptr %c.gep
%iv.next = add i64 %iv, 1
%done = icmp eq i64 %iv.next, 1024
br i1 %done, label %exit, label %loop
exit:
ret void
}
define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-LABEL: define void @test_srem(
; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP13]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL: [[SCALAR_PH]]:
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
; IF-EVL-NEXT: br label %[[LOOP:.*]]
; IF-EVL: [[LOOP]]:
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8
; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8
; IF-EVL-NEXT: [[TMP18:%.*]] = srem i64 [[TMP16]], [[TMP17]]
; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]]
; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8
; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
; NO-VP-LABEL: define void @test_srem(
; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT: br label %[[LOOP:.*]]
; NO-VP: [[LOOP]]:
; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_GEP]], align 8
; NO-VP-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
; NO-VP-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_GEP]], align 8
; NO-VP-NEXT: [[TMP2:%.*]] = srem i64 [[TMP0]], [[TMP1]]
; NO-VP-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]]
; NO-VP-NEXT: store i64 [[TMP2]], ptr [[C_GEP]], align 8
; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; NO-VP-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; NO-VP-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
; NO-VP: [[EXIT]]:
; NO-VP-NEXT: ret void
;
loop.preheader:
br label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ]
%a.gep = getelementptr i64, ptr %a, i64 %iv
%0 = load i64, ptr %a.gep
%b.gep = getelementptr i64, ptr %b, i64 %iv
%1 = load i64, ptr %b.gep
%2 = srem i64 %0, %1
%c.gep = getelementptr i64, ptr %c, i64 %iv
store i64 %2, ptr %c.gep
%iv.next = add i64 %iv, 1
%done = icmp eq i64 %iv.next, 1024
br i1 %done, label %exit, label %loop
exit:
ret void
}
define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-LABEL: define void @test_urem(
; IF-EVL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP9]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = urem <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP6]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP13]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL: [[SCALAR_PH]]:
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
; IF-EVL-NEXT: br label %[[LOOP:.*]]
; IF-EVL: [[LOOP]]:
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8
; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8
; IF-EVL-NEXT: [[TMP18:%.*]] = urem i64 [[TMP16]], [[TMP17]]
; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]]
; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8
; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
; NO-VP-LABEL: define void @test_urem(
; NO-VP-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT: br label %[[LOOP:.*]]
; NO-VP: [[LOOP]]:
; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_GEP]], align 8
; NO-VP-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]]
; NO-VP-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_GEP]], align 8
; NO-VP-NEXT: [[TMP2:%.*]] = urem i64 [[TMP0]], [[TMP1]]
; NO-VP-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]]
; NO-VP-NEXT: store i64 [[TMP2]], ptr [[C_GEP]], align 8
; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; NO-VP-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; NO-VP-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
; NO-VP: [[EXIT]]:
; NO-VP-NEXT: ret void
;
loop.preheader:
br label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ]
%a.gep = getelementptr i64, ptr %a, i64 %iv
%0 = load i64, ptr %a.gep
%b.gep = getelementptr i64, ptr %b, i64 %iv
%1 = load i64, ptr %b.gep
%2 = urem i64 %0, %1
%c.gep = getelementptr i64, ptr %c, i64 %iv
store i64 %2, ptr %c.gep
%iv.next = add i64 %iv, 1
%done = icmp eq i64 %iv.next, 1024
br i1 %done, label %exit, label %loop
exit:
ret void
}
;.
; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
;.

View File

@ -59,7 +59,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]]
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]]
; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]])
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]

View File

@ -39,7 +39,7 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[VP_OP:%.*]] = add <vscale x 2 x i64> [[VP_OP_LOAD]], splat (i64 1)
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
@ -115,7 +115,7 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[VP_OP:%.*]] = add <vscale x 2 x i64> [[VP_OP_LOAD]], splat (i64 1)
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
@ -187,7 +187,7 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; CHECK-NEXT: [[VP_OP:%.*]] = add <vscale x 2 x i64> [[VP_OP_LOAD]], splat (i64 1)
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]

View File

@ -50,7 +50,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VP_OP_LOAD]], [[VP_OP_LOAD3]]
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]

View File

@ -38,7 +38,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
@ -271,7 +271,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = or <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
@ -391,7 +391,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = and <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
@ -511,7 +511,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = xor <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
@ -1139,7 +1139,7 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call reassoc <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = fadd reassoc <vscale x 4 x float> [[VP_OP_LOAD]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[VP_OP]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
@ -1857,7 +1857,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 4 x float> @llvm.vp.fmuladd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VP_OP_LOAD1]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP16:%.*]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VP_OP_LOAD1]], <vscale x 4 x float> [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP17]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]

View File

@ -39,7 +39,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD1]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = add nsw <vscale x 4 x i32> [[VP_OP_LOAD1]], [[VP_OP_LOAD]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])

View File

@ -30,10 +30,10 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SMAX:%.+]]> = call llvm.vp.smax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.smax(ir<[[LD1]]>, ir<[[LD2]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[SMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -86,10 +86,10 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SMIN:%.+]]> = call llvm.vp.smin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.smin(ir<[[LD1]]>, ir<[[LD2]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[SMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -142,10 +142,10 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UMAX:%.+]]> = call llvm.vp.umax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.umax(ir<[[LD1]]>, ir<[[LD2]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[UMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -198,10 +198,10 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UMIN:%.+]]> = call llvm.vp.umin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.umin(ir<[[LD1]]>, ir<[[LD2]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[UMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -251,10 +251,10 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[CTLZ:%.+]]> = call llvm.vp.ctlz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTLZ:%.+]]> = call llvm.ctlz(ir<[[LD1]]>, ir<true>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[CTLZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -302,10 +302,10 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[CTTZ:%.+]]> = call llvm.vp.cttz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTTZ:%.+]]> = call llvm.cttz(ir<[[LD1]]>, ir<true>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[CTTZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -353,12 +353,12 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[LRINT:%.+]]> = call llvm.vp.lrint(vp<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(vp<[[LRINT]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LRINT:%.+]]> = call llvm.lrint(ir<[[FPEXT]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LRINT]]> to i32
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -408,12 +408,12 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[LLRINT:%.+]]> = call llvm.vp.llrint(vp<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(vp<[[LLRINT]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LLRINT:%.+]]> = call llvm.llrint(ir<[[FPEXT]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LLRINT]]> to i32
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -463,10 +463,10 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ABS:%.+]]> = call llvm.vp.abs(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[ABS:%.+]]> = call llvm.abs(ir<[[LD1]]>, ir<true>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[ABS]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>

View File

@ -26,10 +26,10 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SEXT:%.+]]> = call llvm.vp.sext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[SEXT:%.+]]> = sext ir<[[LD1]]> to i64
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[SEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -79,10 +79,10 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ZEXT:%.+]]> = call llvm.vp.zext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = zext ir<[[LD1]]> to i64
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[ZEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -130,10 +130,10 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[TRUNC:%.+]]> = trunc ir<[[LD1]]> to i16
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -181,10 +181,10 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[FPEXT:%.+]]> = fpext ir<[[LD1]]> to double
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -232,10 +232,10 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPTRUNC:%.+]]> = call llvm.vp.fptrunc(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTRUNC:%.+]]> = fptrunc ir<[[LD1]]> to float
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPTRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -283,10 +283,10 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SITOFP:%.+]]> = call llvm.vp.sitofp(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[SITOFP:%.+]]> = sitofp ir<[[LD1]]> to float
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[SITOFP]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -334,10 +334,10 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UITOFP:%.+]]> = call llvm.vp.uitofp(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = uitofp ir<[[LD1]]> to float
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[UITOFP]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -385,10 +385,10 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPTOSI:%.+]]> = call llvm.vp.fptosi(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOSI:%.+]]> = fptosi ir<[[LD1]]> to i32
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPTOSI]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -436,10 +436,10 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPTOUI:%.+]]> = call llvm.vp.fptoui(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[FPTOUI:%.+]]> = fptoui ir<[[LD1]]> to i32
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[FPTOUI]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOUI]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@ -487,10 +487,10 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[INTTOPTR:%.+]]> = call llvm.vp.inttoptr(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN-CAST ir<[[INTTOPTR:%.+]]> = inttoptr ir<[[LD1]]> to ptr
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[INTTOPTR]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[INTTOPTR]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>

View File

@ -49,7 +49,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add ir<[[LD1]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>
; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]>
; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir<true>, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>)
; IF-EVL-OUTLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>

View File

@ -35,7 +35,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]>
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>

View File

@ -44,9 +44,9 @@
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]>
; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = vp.sub ir<0>, ir<[[LD2]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = sub ir<0>, ir<[[LD2]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, ir<[[SUB]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add vp<[[SELECT]]>, ir<[[LD1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add vp<[[SELECT]]>, ir<[[LD1]]>
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>