llvm-project/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll
Hans Wennborg e858b10917 Revert "[SLP]Reduce number of alternate instruction, where possible"
This caused failures such as:

  Instruction does not dominate all uses!
  %29 = insertelement <8 x i64> %28, i64 %xor6.i.5, i64 6
  %17 = shufflevector <8 x i64> %29, <8 x i64> poison, <6 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>

see comment on https://github.com/llvm/llvm-project/pull/123360

> Previous version was reviewed here https://github.com/llvm/llvm-project/pull/123360
> It is mostly the same, adjusted after graph-to-tree transformation
>
> Patch tries to remove wide alternate operations.
> Currently SLP vectorizer emits something like this:
> ```
> %0 = add i32
> %1 = sub i32
> %2 = add i32
> %3 = sub i32
> %4 = add i32
> %5 = sub i32
> %6 = add i32
> %7 = sub i32
>
> transformes to
>
> %v1 = add <8 x i32>
> %v2 = sub <8 x i32>
> %res = shuffle %v1, %v2, <0, 9, 2, 11, 4, 13, 6, 15>
> ```
> i.e. half of the results are just unused. This leads to increased
> register pressure and potentially doubles number of operations.
>
> Patch introduces SplitVectorize mode, where it splits the operations by
> opcodes and produces instead something like this:
> ```
> %v1 = add <4 x i32>
> %v2 = sub <4 x i32>
> %res = shuffle %v1, %v2, <0, 4, 1, 5, 2, 6, 3, 7>
> ```
> It allows to improve the performance by reducing number of ops. Also, it
> turns on some other improvements, like improved graph reordering.
>
> [...]

This reverts commit 9d37e61fc77d3d6de891c30630f1c0227522031d as well as
the follow-up commit 72bb0a9a9c6fdde43e1e191f2dc0d5d2d46aff4e.
2025-03-11 15:04:36 +01:00

106 lines
6.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s
define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) {
; CHECK-LABEL: define i32 @test(
; CHECK-SAME: ptr [[F:%.*]], i1 [[TOBOOL_I_4:%.*]], i32 [[RETVAL_0_I_219:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F]], align 4
; CHECK-NEXT: br label %[[D_EXIT_3:.*]]
; CHECK: [[IF_END_I_1:.*]]:
; CHECK-NEXT: br label %[[D_EXIT_3]]
; CHECK: [[IF_END_I_2:.*]]:
; CHECK-NEXT: br i1 false, label %[[D_EXIT_3]], label %[[D_EXIT_6:.*]]
; CHECK: [[D_EXIT_3]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[ENTRY]] ], [ poison, %[[IF_END_I_1]] ]
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 1, i32 0, i32 0>, i32 [[RETVAL_0_I_219]], i32 0
; CHECK-NEXT: br i1 [[TOBOOL_I_4]], label %[[D_EXIT_4:.*]], label %[[D_EXIT_6]]
; CHECK: [[D_EXIT_4]]:
; CHECK-NEXT: br label %[[D_EXIT_6]]
; CHECK: [[IF_END_I_5:.*]]:
; CHECK-NEXT: br i1 false, label %[[D_EXIT_6]], label %[[D_EXIT_7:.*]]
; CHECK: [[D_EXIT_6]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP1]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ [[TMP1]], %[[D_EXIT_4]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP2]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[D_EXIT_4]] ]
; CHECK-NEXT: br label %[[D_EXIT_7]]
; CHECK: [[D_EXIT_7]]:
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP3]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ [[TMP4]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> <i32 1, i32 poison, i32 poison, i32 1, i32 poison, i32 1, i32 1, i32 poison>, <8 x i32> <i32 8, i32 1, i32 2, i32 11, i32 poison, i32 13, i32 14, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP0]], i32 4
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[RETVAL_0_I_219]], i32 7
; CHECK-NEXT: [[TMP12:%.*]] = add <8 x i32> [[TMP11]], [[TMP7]]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 1, i32 1>, i32 [[RETVAL_0_I_219]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP16]])
; CHECK-NEXT: [[OP_RDX4:%.*]] = or i32 [[TMP18]], [[TMP17]]
; CHECK-NEXT: ret i32 [[OP_RDX4]]
;
entry:
%0 = load i32, ptr %f, align 4
br label %d.exit.3
if.end.i.1:
br label %d.exit.3
if.end.i.2:
br i1 false, label %d.exit.3, label %d.exit.6
d.exit.3:
%retval.0.i.2191 = phi i32 [ 1, %if.end.i.2 ], [ 0, %entry ], [ 0, %if.end.i.1 ]
%retval.0.i91117 = phi i32 [ 0, %if.end.i.2 ], [ 0, %entry ], [ 1, %if.end.i.1 ]
br i1 %tobool.i.4, label %d.exit.4, label %d.exit.6
d.exit.4:
br label %d.exit.6
if.end.i.5:
br i1 false, label %d.exit.6, label %d.exit.7
d.exit.6:
%retval.0.i.549 = phi i32 [ -1, %if.end.i.5 ], [ 0, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
%retval.0.i.3272947 = phi i32 [ 0, %if.end.i.5 ], [ 1, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
%retval.0.i.11315253145 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i.2191, %d.exit.3 ], [ 0, %if.end.i.2 ], [ %retval.0.i.2191, %d.exit.4 ]
%retval.0.i91117233343 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i91117, %d.exit.3 ], [ 0, %if.end.i.2 ], [ %retval.0.i91117, %d.exit.4 ]
%retval.0.i.219213541 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i.219, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
%retval.0.i.43739 = phi i32 [ 1, %if.end.i.5 ], [ 0, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
br label %d.exit.7
d.exit.7:
%retval.0.i.4373961 = phi i32 [ %retval.0.i.43739, %d.exit.6 ], [ 0, %if.end.i.5 ]
%retval.0.i.21921354159 = phi i32 [ %retval.0.i.219213541, %d.exit.6 ], [ 0, %if.end.i.5 ]
%retval.0.i9111723334357 = phi i32 [ %retval.0.i91117233343, %d.exit.6 ], [ 0, %if.end.i.5 ]
%retval.0.i.1131525314555 = phi i32 [ %retval.0.i.11315253145, %d.exit.6 ], [ 0, %if.end.i.5 ]
%retval.0.i.327294753 = phi i32 [ %retval.0.i.3272947, %d.exit.6 ], [ 0, %if.end.i.5 ]
%retval.0.i.54951 = phi i32 [ %retval.0.i.549, %d.exit.6 ], [ 0, %if.end.i.5 ]
%add.5 = add nsw i32 %retval.0.i.54951, 1
%add.4 = add i32 %0, %retval.0.i.4373961
%add.3 = add i32 %retval.0.i.219, %retval.0.i.327294753
%add.2 = add i32 %retval.0.i.21921354159, 1
%add.1 = add i32 %retval.0.i.219, %retval.0.i.1131525314555
%add = add i32 %retval.0.i.219, %retval.0.i9111723334357
%add1 = add nsw i32 %retval.0.i9111723334357, 1
%1 = or i32 %add, %add1
%2 = or i32 %add.1, %1
%add1.1 = add nsw i32 %retval.0.i.1131525314555, 1
%or2.1 = or i32 %2, %add1.1
%3 = or i32 %add.2, %or2.1
%add1.2 = add i32 %retval.0.i.219, %retval.0.i.21921354159
%or2.2 = or i32 %3, %add1.2
%4 = or i32 %add.3, %or2.2
%add1.3 = add nsw i32 %retval.0.i.327294753, 1
%or2.3 = or i32 %4, %add1.3
%5 = or i32 %add.4, %or2.3
%add1.4 = add nsw i32 %retval.0.i.4373961, 1
%or2.4 = or i32 %5, %add1.4
%6 = or i32 %add.5, %or2.4
%add1.5 = add i32 %retval.0.i.219, %retval.0.i.54951
%or2.5 = or i32 %6, %add1.5
ret i32 %or2.5
}