[InstCombine] canonicalize fneg after shuffle

For the unary shuffle pattern, this is opposite to what we try to do with binops, but it seems better to keep it consistent with the motivating binary shuffle pattern. On that, it is clearly better on the usual no-extra uses case. There is a chance that this will pull an fneg away from some other binop and cause a regression in codegen, but that should be invertible in the backend. The transform is birectional: https://alive2.llvm.org/ce/z/kKaKCU https://alive2.llvm.org/ce/z/3Desfw Fixes #45631
2025-04-27 17:16:06 +00:00 · 2022-05-06 15:48:41 -04:00 · 2022-05-06 15:48:41 -04:00 · b331a7ebc1
commit b331a7ebc1
parent ef9d39de2f
3 changed files with 64 additions and 19 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@ -2242,6 +2242,39 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
 }

+/// Canonicalize FP negate after shuffle.
+static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf,
+                                    InstCombiner::BuilderTy &Builder) {
+  Instruction *FNeg0;
+  Value *X;
+  if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
+                                              m_FNeg(m_Value(X)))))
+    return nullptr;
+
+  // shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
+  if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
+    Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
+    return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
+  }
+
+  Instruction *FNeg1;
+  Value *Y;
+  if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
+                                              m_FNeg(m_Value(Y)))))
+    return nullptr;
+
+  // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
+  if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
+    Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
+    Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
+    NewFNeg->copyIRFlags(FNeg0);
+    NewFNeg->andIRFlags(FNeg1);
+    return NewFNeg;
+  }
+
+  return nullptr;
+}
+
 /// Try to fold an extract subvector operation.
 static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
@ -2537,6 +2570,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
  if (Instruction *I = narrowVectorSelect(SVI, Builder))
    return I;

+  if (Instruction *I = foldFNegShuffle(SVI, Builder))
+    return I;
+
  APInt UndefElts(VWidth, 0);
  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
--- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
@ -749,8 +749,8 @@ define <8 x i8> @pr19730(<16 x i8> %in0) {

 define i32 @pr19737(<4 x i32> %in0) {
 ; CHECK-LABEL: @pr19737(
-; CHECK-NEXT:    [[RV:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i64 0
-; CHECK-NEXT:    ret i32 [[RV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i64 0
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
  %shuffle.i = shufflevector <4 x i32> zeroinitializer, <4 x i32> %in0, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
  %neg.i = xor <4 x i32> %shuffle.i, <i32 -1, i32 -1, i32 -1, i32 -1>
@ -1277,8 +1277,8 @@ define <2 x float> @fsub_splat_constant1(<2 x float> %x) {

 define <2 x float> @fneg(<2 x float> %x) {
 ; CHECK-LABEL: @fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = fneg <2 x float> [[TMP1]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
  %splat = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> zeroinitializer
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@ -1278,8 +1278,8 @@ define <2 x float> @fsub_splat_constant1(<2 x float> %x) {

 define <2 x float> @fneg(<2 x float> %x) {
 ; CHECK-LABEL: @fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = fneg <2 x float> [[TMP1]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
@ -1791,8 +1791,8 @@ define <4 x i32> @PR46872(<4 x i32> %x) {

 define <2 x float> @fneg_unary_shuf(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_unary_shuf(
-; CHECK-NEXT:    [[NX:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = fneg nnan nsz <2 x float> [[TMP1]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
  %nx = fneg nsz nnan <2 x float> %x
@ -1802,8 +1802,8 @@ define <2 x float> @fneg_unary_shuf(<2 x float> %x) {

 define <4 x half> @fneg_unary_shuf_widen(<2 x half> %x) {
 ; CHECK-LABEL: @fneg_unary_shuf_widen(
-; CHECK-NEXT:    [[NX:%.*]] = fneg ninf <2 x half> [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x half> [[NX]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = fneg ninf <4 x half> [[TMP1]]
 ; CHECK-NEXT:    ret <4 x half> [[R]]
 ;
  %nx = fneg ninf <2 x half> %x
@ -1813,8 +1813,8 @@ define <4 x half> @fneg_unary_shuf_widen(<2 x half> %x) {

 define <2 x double> @fneg_unary_shuf_narrow(<4 x double> %x) {
 ; CHECK-LABEL: @fneg_unary_shuf_narrow(
-; CHECK-NEXT:    [[NX:%.*]] = fneg nsz <4 x double> [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x double> [[NX]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = fneg nsz <2 x double> [[TMP1]]
 ; CHECK-NEXT:    ret <2 x double> [[R]]
 ;
  %nx = fneg nsz <4 x double> %x
@ -1822,6 +1822,8 @@ define <2 x double> @fneg_unary_shuf_narrow(<4 x double> %x) {
  ret <2 x double> %r
 }

+; negative test - extra use prevents canonicalization
+
 define <2 x float> @fneg_unary_shuf_use(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_unary_shuf_use(
 ; CHECK-NEXT:    [[NX:%.*]] = fneg nsz <2 x float> [[X:%.*]]
@ -1835,11 +1837,12 @@ define <2 x float> @fneg_unary_shuf_use(<2 x float> %x) {
  ret <2 x float> %r
 }

+; intersect FMF
+
 define <4 x float> @fneg_shuf(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @fneg_shuf(
-; CHECK-NEXT:    [[NX:%.*]] = fneg ninf nsz <4 x float> [[X:%.*]]
-; CHECK-NEXT:    [[NY:%.*]] = fneg nnan ninf <4 x float> [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[R:%.*]] = fneg ninf <4 x float> [[TMP1]]
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
  %nx = fneg nsz ninf <4 x float> %x
@ -1848,12 +1851,14 @@ define <4 x float> @fneg_shuf(<4 x float> %x, <4 x float> %y) {
  ret <4 x float> %r
 }

+; length-changing shuffle and extra use are ok
+
 define <4 x float> @fneg_shuf_widen_use1(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fneg_shuf_widen_use1(
 ; CHECK-NEXT:    [[NX:%.*]] = fneg nnan <2 x float> [[X:%.*]]
 ; CHECK-NEXT:    call void @use(<2 x float> [[NX]])
-; CHECK-NEXT:    [[NY:%.*]] = fneg nnan <2 x float> [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> [[NY]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = fneg nnan <4 x float> [[TMP1]]
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
  %nx = fneg nnan <2 x float> %x
@ -1863,12 +1868,14 @@ define <4 x float> @fneg_shuf_widen_use1(<2 x float> %x, <2 x float> %y) {
  ret <4 x float> %r
 }

+; length-changing shuffle and extra use still ok
+
 define <2 x float> @fneg_shuf_narrow_use2(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @fneg_shuf_narrow_use2(
-; CHECK-NEXT:    [[NX:%.*]] = fneg nnan nsz <4 x float> [[X:%.*]]
 ; CHECK-NEXT:    [[NY:%.*]] = fneg nnan nsz <4 x float> [[Y:%.*]]
 ; CHECK-NEXT:    call void @use4(<4 x float> [[NY]])
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <2 x i32> <i32 3, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y]], <2 x i32> <i32 3, i32 5>
+; CHECK-NEXT:    [[R:%.*]] = fneg nnan nsz <2 x float> [[TMP1]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
  %nx = fneg nsz nnan <4 x float> %x
@ -1878,6 +1885,8 @@ define <2 x float> @fneg_shuf_narrow_use2(<4 x float> %x, <4 x float> %y) {
  ret <2 x float> %r
 }

+; negative test - too many extra uses
+
 define <2 x float> @fneg_shuf_use3(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fneg_shuf_use3(
 ; CHECK-NEXT:    [[NX:%.*]] = fneg nnan <2 x float> [[X:%.*]]