mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 20:56:07 +00:00
[InstCombine] fold shuffles of insert_subvectors
This should be a valid exception to the general rule of not creating new shuffle masks in IR... because we already do it. :) Also, DAG combining/legalization will undo this by widening the shuffle back out if needed. Explanation for how we already do this: SLP or vector source can create chains of insert/extract as shown in 1 of the examples from PR16739: https://godbolt.org/z/NlK7rA https://bugs.llvm.org/show_bug.cgi?id=16739 And we expect instcombine or DAGCombine to clean that up by creating relatively simple shuffles. Differential Revision: https://reviews.llvm.org/D62024 llvm-svn: 361338
This commit is contained in:
parent
bceadcbb0c
commit
6a554188aa
@ -912,6 +912,31 @@ A target implementation tells the legalizer which operations are not supported
|
||||
(and which of the above three actions to take) by calling the
|
||||
``setOperationAction`` method in its ``TargetLowering`` constructor.
|
||||
|
||||
If a target has legal vector types, it is expected to produce efficient machine
|
||||
code for common forms of the shufflevector IR instruction using those types.
|
||||
This may require custom legalization for SelectionDAG vector operations that
|
||||
are created from the shufflevector IR. The shufflevector forms that should be
|
||||
handled include:
|
||||
|
||||
* Vector select --- Each element of the vector is chosen from either of the
|
||||
corresponding elements of the 2 input vectors. This operation may also be
|
||||
known as a "blend" or "bitwise select" in target assembly. This type of shuffle
|
||||
maps directly to the ``shuffle_vector`` SelectionDAG node.
|
||||
|
||||
* Insert subvector --- A vector is placed into a longer vector type starting
|
||||
at index 0. This type of shuffle maps directly to the ``insert_subvector``
|
||||
SelectionDAG node with the ``index`` operand set to 0.
|
||||
|
||||
* Extract subvector --- A vector is pulled from a longer vector type starting
|
||||
at index 0. This type of shuffle maps directly to the ``extract_subvector``
|
||||
SelectionDAG node with the ``index`` operand set to 0.
|
||||
|
||||
* Splat --- All elements of the vector have identical scalar elements. This
|
||||
operation may also be known as a "broadcast" or "duplicate" in target assembly.
|
||||
The shufflevector IR instruction may change the vector length, so this operation
|
||||
may map to multiple SelectionDAG nodes including ``shuffle_vector``,
|
||||
``concat_vectors``, ``insert_subvector``, and ``extract_subvector``.
|
||||
|
||||
Prior to the existence of the Legalize passes, we required that every target
|
||||
`selector`_ supported and handled every operator and type even if they are not
|
||||
natively supported. The introduction of the Legalize phases allows all of the
|
||||
|
@ -1622,6 +1622,55 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
|
||||
// Match the operands as identity with padding (also known as concatenation
|
||||
// with undef) shuffles of the same source type. The backend is expected to
|
||||
// recreate these concatenations from a shuffle of narrow operands.
|
||||
auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
|
||||
auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
|
||||
if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
|
||||
!Shuffle1 || !Shuffle1->isIdentityWithPadding())
|
||||
return nullptr;
|
||||
|
||||
// We limit this transform to power-of-2 types because we expect that the
|
||||
// backend can convert the simplified IR patterns to identical nodes as the
|
||||
// original IR.
|
||||
// TODO: If we can verify that behavior for arbitrary types, the power-of-2
|
||||
// checks can be removed.
|
||||
Value *X = Shuffle0->getOperand(0);
|
||||
Value *Y = Shuffle1->getOperand(0);
|
||||
if (X->getType() != Y->getType() ||
|
||||
!isPowerOf2_32(Shuf.getType()->getVectorNumElements()) ||
|
||||
!isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
|
||||
!isPowerOf2_32(X->getType()->getVectorNumElements()) ||
|
||||
isa<UndefValue>(X) || isa<UndefValue>(Y))
|
||||
return nullptr;
|
||||
assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
|
||||
isa<UndefValue>(Shuffle1->getOperand(1)) &&
|
||||
"Unexpected operand for identity shuffle");
|
||||
|
||||
// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
|
||||
// operands directly by adjusting the shuffle mask to account for the narrower
|
||||
// types:
|
||||
// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
|
||||
int NarrowElts = X->getType()->getVectorNumElements();
|
||||
int WideElts = Shuffle0->getType()->getVectorNumElements();
|
||||
assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
|
||||
|
||||
Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
|
||||
SmallVector<int, 16> Mask = Shuf.getShuffleMask();
|
||||
SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
|
||||
for (int i = 0, e = Mask.size(); i != e; ++i) {
|
||||
if (Mask[i] == -1)
|
||||
continue;
|
||||
if (Mask[i] < WideElts)
|
||||
NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
|
||||
else
|
||||
NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
|
||||
}
|
||||
return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
|
||||
}
|
||||
|
||||
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
Value *LHS = SVI.getOperand(0);
|
||||
Value *RHS = SVI.getOperand(1);
|
||||
@ -1676,10 +1725,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
if (Instruction *I = foldIdentityExtractShuffle(SVI))
|
||||
return I;
|
||||
|
||||
// This transform has the potential to lose undef knowledge, so it is
|
||||
// These transforms have the potential to lose undef knowledge, so they are
|
||||
// intentionally placed after SimplifyDemandedVectorElts().
|
||||
if (Instruction *I = foldShuffleWithInsert(SVI))
|
||||
return I;
|
||||
if (Instruction *I = foldIdentityPaddedShuffles(SVI))
|
||||
return I;
|
||||
|
||||
if (VWidth == LHSWidth) {
|
||||
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
|
||||
|
@ -1140,6 +1140,8 @@ define <2 x i1> @PR40734(<1 x i1> %x, <4 x i1> %y) {
|
||||
ret <2 x i1> %r
|
||||
}
|
||||
|
||||
; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
|
||||
|
||||
define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) {
|
||||
; CHECK-LABEL: @insert_subvector_shuffles(
|
||||
; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -1155,9 +1157,7 @@ define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) {
|
||||
|
||||
define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) {
|
||||
; CHECK-LABEL: @insert_subvector_shuffles_pow2elts(
|
||||
; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i8> [[S1]], <8 x i8> [[S2]], <8 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9, i32 0>
|
||||
; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> <i32 0, i32 2, i32 1, i32 undef, i32 2, i32 1, i32 3, i32 0>
|
||||
; CHECK-NEXT: ret <8 x i8> [[S3]]
|
||||
;
|
||||
%s1 = shufflevector <2 x i8> %x, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -1167,6 +1167,7 @@ define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) {
|
||||
}
|
||||
|
||||
; The last shuffle may change the vector type.
|
||||
; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
|
||||
|
||||
define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) {
|
||||
; CHECK-LABEL: @insert_subvector_shuffles_narrowing(
|
||||
@ -1183,9 +1184,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) {
|
||||
|
||||
define <2 x i8> @insert_subvector_shuffles_narrowing_pow2elts(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @insert_subvector_shuffles_narrowing_pow2elts(
|
||||
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i8> [[S1]], <8 x i8> [[S2]], <2 x i32> <i32 0, i32 8>
|
||||
; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 4>
|
||||
; CHECK-NEXT: ret <2 x i8> [[S3]]
|
||||
;
|
||||
%s1 = shufflevector <4 x i8> %x, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -1198,9 +1197,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing_pow2elts(<4 x i8> %x, <4 x
|
||||
|
||||
define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) {
|
||||
; CHECK-LABEL: @insert_subvector_shuffles_identity(
|
||||
; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x double> [[S3]]
|
||||
;
|
||||
%s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
|
||||
|
Loading…
x
Reference in New Issue
Block a user