mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 10:26:06 +00:00
Revert "[VPlan] Add transformation to narrow interleave groups. (#106441)"
This reverts commit dfa665f19c52d98b8d833a8e9073427ba5641b19. This commit caused miscompilations in ffmpeg, see https://github.com/llvm/llvm-project/pull/106441 for details.
This commit is contained in:
parent
b2b3cb5f76
commit
ff3e2ba9eb
@ -2236,36 +2236,6 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
|
||||
/// converted to a narrower recipe. \p V is used by a wide recipe \p WideMember
|
||||
/// that feeds a store interleave group at index \p Idx, \p WideMember0 is the
|
||||
/// recipe feeding the same interleave group at index 0. A VPWidenLoadRecipe can
|
||||
/// be narrowed to an index-independent load if it feeds all wide ops at all
|
||||
/// indices (checked by via the operands of the wide recipe at lane0, \p
|
||||
/// WideMember0). A VPInterleaveRecipe can be narrowed to a wide load, if \p V
|
||||
/// is defined at \p Idx of a load interleave group.
|
||||
static bool canNarrowLoad(VPWidenRecipe *WideMember0, VPWidenRecipe *WideMember,
|
||||
VPValue *V, unsigned Idx) {
|
||||
auto *DefR = V->getDefiningRecipe();
|
||||
if (!DefR)
|
||||
return false;
|
||||
if (auto *W = dyn_cast<VPWidenLoadRecipe>(DefR))
|
||||
return !W->getMask() &&
|
||||
all_of(zip(WideMember0->operands(), WideMember->operands()),
|
||||
[V](const auto P) {
|
||||
// V must be as at the same places in both WideMember0 and
|
||||
// WideMember.
|
||||
const auto &[WideMember0Op, WideMemberOp] = P;
|
||||
return (WideMember0Op == V) == (WideMemberOp == V);
|
||||
});
|
||||
|
||||
if (auto *IR = dyn_cast<VPInterleaveRecipe>(DefR))
|
||||
return IR->getInterleaveGroup()->getFactor() ==
|
||||
IR->getInterleaveGroup()->getNumMembers() &&
|
||||
IR->getVPValue(Idx) == V;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns true if \p IR is a full interleave group with factor and number of
|
||||
/// members both equal to \p VF. The interleave group must also access the full
|
||||
/// vector width \p VectorRegWidth.
|
||||
@ -2328,8 +2298,6 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
|
||||
if (R.mayWriteToMemory() && !InterleaveR)
|
||||
return;
|
||||
|
||||
// All other ops are allowed, but we reject uses that cannot be converted
|
||||
// when checking all allowed consumers (store interleave groups) below.
|
||||
if (!InterleaveR)
|
||||
continue;
|
||||
|
||||
@ -2344,7 +2312,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
|
||||
|
||||
// For now, we only support full interleave groups storing load interleave
|
||||
// groups.
|
||||
if (all_of(enumerate(InterleaveR->getStoredValues()), [](auto Op) {
|
||||
if (!all_of(enumerate(InterleaveR->getStoredValues()), [](auto Op) {
|
||||
VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
|
||||
if (!DefR)
|
||||
return false;
|
||||
@ -2354,25 +2322,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
|
||||
IR->getInterleaveGroup()->getNumMembers() &&
|
||||
IR->getVPValue(Op.index()) == Op.value();
|
||||
})) {
|
||||
StoreGroups.push_back(InterleaveR);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if all values feeding InterleaveR are matching wide recipes, which
|
||||
// operands that can be narrowed.
|
||||
auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
|
||||
InterleaveR->getStoredValues()[0]->getDefiningRecipe());
|
||||
if (!WideMember0)
|
||||
return;
|
||||
for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) {
|
||||
auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe());
|
||||
if (!R || R->getOpcode() != WideMember0->getOpcode() ||
|
||||
R->getNumOperands() > 2)
|
||||
return;
|
||||
if (any_of(R->operands(), [WideMember0, Idx = I, R](VPValue *V) {
|
||||
return !canNarrowLoad(WideMember0, R, V, Idx);
|
||||
}))
|
||||
return;
|
||||
}
|
||||
StoreGroups.push_back(InterleaveR);
|
||||
}
|
||||
@ -2380,41 +2330,23 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
|
||||
if (StoreGroups.empty())
|
||||
return;
|
||||
|
||||
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
|
||||
// Convert InterleaveGroup R to a single VPWidenLoadRecipe.
|
||||
auto NarrowOp = [](VPRecipeBase *R) -> VPValue * {
|
||||
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
|
||||
// Narrow interleave group to wide load, as transformed VPlan will only
|
||||
// process one original iteration.
|
||||
auto *L = new VPWidenLoadRecipe(
|
||||
*cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
|
||||
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
|
||||
/*Reverse=*/false, LoadGroup->getDebugLoc());
|
||||
L->insertBefore(LoadGroup);
|
||||
return L;
|
||||
}
|
||||
|
||||
auto *WideLoad = cast<VPWidenLoadRecipe>(R);
|
||||
|
||||
// Narrow wide load to uniform scalar load, as transformed VPlan will only
|
||||
auto *LoadGroup = cast<VPInterleaveRecipe>(R);
|
||||
// Narrow interleave group to wide load, as transformed VPlan will only
|
||||
// process one original iteration.
|
||||
auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(),
|
||||
WideLoad->operands(), /*IsUniform*/ true);
|
||||
N->insertBefore(WideLoad);
|
||||
return N;
|
||||
auto *L = new VPWidenLoadRecipe(
|
||||
*cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
|
||||
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
|
||||
/*Reverse=*/false, LoadGroup->getDebugLoc());
|
||||
L->insertBefore(LoadGroup);
|
||||
return L;
|
||||
};
|
||||
|
||||
// Narrow operation tree rooted at store groups.
|
||||
for (auto *StoreGroup : StoreGroups) {
|
||||
VPValue *Res = nullptr;
|
||||
if (auto *WideMember0 = dyn_cast<VPWidenRecipe>(
|
||||
StoreGroup->getStoredValues()[0]->getDefiningRecipe())) {
|
||||
for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
|
||||
WideMember0->setOperand(
|
||||
Idx, NarrowOp(WideMember0->getOperand(Idx)->getDefiningRecipe()));
|
||||
Res = WideMember0;
|
||||
} else {
|
||||
Res = NarrowOp(StoreGroup->getStoredValues()[0]->getDefiningRecipe());
|
||||
}
|
||||
VPValue *Res =
|
||||
NarrowOp(StoreGroup->getStoredValues()[0]->getDefiningRecipe());
|
||||
|
||||
auto *S = new VPWidenStoreRecipe(
|
||||
*cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()),
|
||||
|
@ -99,17 +99,31 @@ define void @test_complex_add_double(ptr %res, ptr noalias %A, ptr noalias %B, i
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = load <2 x double>, ptr [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC10:%.*]] = load <2 x double>, ptr [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x double>, ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
||||
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
||||
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
||||
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
||||
; CHECK-NEXT: [[WIDE_VEC5:%.*]] = load <4 x double>, ptr [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <4 x double> [[WIDE_VEC5]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
||||
; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <4 x double> [[WIDE_VEC5]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
||||
; CHECK-NEXT: [[WIDE_VEC8:%.*]] = load <4 x double>, ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <4 x double> [[WIDE_VEC8]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
||||
; CHECK-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <4 x double> [[WIDE_VEC8]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[STRIDED_VEC]], [[STRIDED_VEC6]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[STRIDED_VEC3]], [[STRIDED_VEC9]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[STRIDED_VEC1]], [[STRIDED_VEC7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[STRIDED_VEC4]], [[STRIDED_VEC10]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP10]], align 4
|
||||
; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[TMP11]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC11]], ptr [[TMP11]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
|
@ -19,14 +19,24 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no
|
||||
; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1
|
||||
; VF2-NEXT: [[TMP12:%.*]] = shl nsw i64 [[TMP10]], 1
|
||||
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP12]]
|
||||
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8
|
||||
; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP5]], align 8
|
||||
; VF2-NEXT: [[TMP9:%.*]] = fneg <2 x double> [[WIDE_LOAD]]
|
||||
; VF2-NEXT: [[TMP11:%.*]] = fneg <2 x double> [[WIDE_LOAD1]]
|
||||
; VF2-NEXT: store <2 x double> [[TMP9]], ptr [[TMP2]], align 8
|
||||
; VF2-NEXT: store <2 x double> [[TMP11]], ptr [[TMP5]], align 8
|
||||
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP12]]
|
||||
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
|
||||
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
||||
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
||||
; VF2-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP13]], align 8
|
||||
; VF2-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
||||
; VF2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
||||
; VF2-NEXT: [[TMP3:%.*]] = fneg <2 x double> [[STRIDED_VEC]]
|
||||
; VF2-NEXT: [[TMP14:%.*]] = fneg <2 x double> [[STRIDED_VEC3]]
|
||||
; VF2-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[STRIDED_VEC1]]
|
||||
; VF2-NEXT: [[TMP9:%.*]] = fneg <2 x double> [[STRIDED_VEC4]]
|
||||
; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||
; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
|
||||
; VF2-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; VF2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||
; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC5]], ptr [[TMP13]], align 8
|
||||
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
|
||||
; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; VF2: [[MIDDLE_BLOCK]]:
|
||||
@ -190,15 +200,18 @@ define void @test_2xi64(ptr noalias %data, ptr noalias %factor) {
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
|
||||
; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
|
||||
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
|
||||
; VF2-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
|
||||
; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1
|
||||
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
|
||||
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
|
||||
; VF2-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
|
||||
; VF2-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP7]], align 8
|
||||
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
|
||||
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
|
||||
; VF2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
|
||||
; VF2-NEXT: [[TMP23:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
|
||||
; VF2-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]]
|
||||
; VF2-NEXT: [[TMP24:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP23]]
|
||||
; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP24]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||
; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
|
||||
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
|
||||
; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
||||
; VF2: [[MIDDLE_BLOCK]]:
|
||||
@ -1001,30 +1014,28 @@ define void @test_2xi64_sub_of_wide_loads(ptr noalias %data, ptr noalias %A, ptr
|
||||
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
|
||||
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
|
||||
; VF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8
|
||||
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
|
||||
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
|
||||
; VF2-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0
|
||||
; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
|
||||
; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
|
||||
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
|
||||
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 2
|
||||
; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
|
||||
; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0
|
||||
; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
|
||||
; VF2-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP11]], i64 0
|
||||
; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT5]], <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
|
||||
; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
|
||||
; VF2-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
|
||||
; VF2-NEXT: [[TMP13:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]]
|
||||
; VF2-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP0]], 1
|
||||
; VF2-NEXT: [[TMP20:%.*]] = shl nsw i64 [[TMP1]], 1
|
||||
; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP19]]
|
||||
; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP20]]
|
||||
; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[DATA_0]], align 8
|
||||
; VF2-NEXT: store <2 x i64> [[TMP13]], ptr [[DATA_1]], align 8
|
||||
; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[TMP14:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
|
||||
; VF2-NEXT: [[TMP15:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP16]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||
; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[DATA_0]], align 8
|
||||
; VF2-NEXT: [[TMP17:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; VF2-NEXT: [[INTERLEAVED_VEC4:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||
; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC4]], ptr [[DATA_1]], align 8
|
||||
; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100
|
||||
; VF2-NEXT: br i1 [[EC]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
|
||||
; VF2: [[MIDDLE_BLOCK]]:
|
||||
|
@ -20,14 +20,23 @@ define void @test_4xi64(ptr noalias %data, ptr noalias %factor, i64 noundef %n)
|
||||
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[ARRAYIDX]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 0
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
|
||||
; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 0
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
|
||||
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
|
||||
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
|
||||
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i64> [[STRIDED_VEC2]], [[WIDE_LOAD]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[STRIDED_VEC2]], [[STRIDED_VEC1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i64> [[STRIDED_VEC2]], [[STRIDED_VEC4]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i64> [[STRIDED_VEC2]], [[STRIDED_VEC3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP8]], <8 x i64> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP10]], <16 x i64> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
|
||||
; CHECK-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user