mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-28 06:46:07 +00:00

This caused failures such as: Instruction does not dominate all uses! %29 = insertelement <8 x i64> %28, i64 %xor6.i.5, i64 6 %17 = shufflevector <8 x i64> %29, <8 x i64> poison, <6 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> see comment on https://github.com/llvm/llvm-project/pull/123360 > Previous version was reviewed here https://github.com/llvm/llvm-project/pull/123360 > It is mostly the same, adjusted after graph-to-tree transformation > > Patch tries to remove wide alternate operations. > Currently SLP vectorizer emits something like this: > ``` > %0 = add i32 > %1 = sub i32 > %2 = add i32 > %3 = sub i32 > %4 = add i32 > %5 = sub i32 > %6 = add i32 > %7 = sub i32 > > transformes to > > %v1 = add <8 x i32> > %v2 = sub <8 x i32> > %res = shuffle %v1, %v2, <0, 9, 2, 11, 4, 13, 6, 15> > ``` > i.e. half of the results are just unused. This leads to increased > register pressure and potentially doubles number of operations. > > Patch introduces SplitVectorize mode, where it splits the operations by > opcodes and produces instead something like this: > ``` > %v1 = add <4 x i32> > %v2 = sub <4 x i32> > %res = shuffle %v1, %v2, <0, 4, 1, 5, 2, 6, 3, 7> > ``` > It allows to improve the performance by reducing number of ops. Also, it > turns on some other improvements, like improved graph reordering. > > [...] This reverts commit 9d37e61fc77d3d6de891c30630f1c0227522031d as well as the follow-up commit 72bb0a9a9c6fdde43e1e191f2dc0d5d2d46aff4e.
106 lines
6.0 KiB
LLVM
106 lines
6.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s
|
|
|
|
define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) {
|
|
; CHECK-LABEL: define i32 @test(
|
|
; CHECK-SAME: ptr [[F:%.*]], i1 [[TOBOOL_I_4:%.*]], i32 [[RETVAL_0_I_219:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F]], align 4
|
|
; CHECK-NEXT: br label %[[D_EXIT_3:.*]]
|
|
; CHECK: [[IF_END_I_1:.*]]:
|
|
; CHECK-NEXT: br label %[[D_EXIT_3]]
|
|
; CHECK: [[IF_END_I_2:.*]]:
|
|
; CHECK-NEXT: br i1 false, label %[[D_EXIT_3]], label %[[D_EXIT_6:.*]]
|
|
; CHECK: [[D_EXIT_3]]:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[ENTRY]] ], [ poison, %[[IF_END_I_1]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 1, i32 0, i32 0>, i32 [[RETVAL_0_I_219]], i32 0
|
|
; CHECK-NEXT: br i1 [[TOBOOL_I_4]], label %[[D_EXIT_4:.*]], label %[[D_EXIT_6]]
|
|
; CHECK: [[D_EXIT_4]]:
|
|
; CHECK-NEXT: br label %[[D_EXIT_6]]
|
|
; CHECK: [[IF_END_I_5:.*]]:
|
|
; CHECK-NEXT: br i1 false, label %[[D_EXIT_6]], label %[[D_EXIT_7:.*]]
|
|
; CHECK: [[D_EXIT_6]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP1]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ [[TMP1]], %[[D_EXIT_4]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP2]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[D_EXIT_4]] ]
|
|
; CHECK-NEXT: br label %[[D_EXIT_7]]
|
|
; CHECK: [[D_EXIT_7]]:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP3]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ [[TMP4]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> <i32 1, i32 poison, i32 poison, i32 1, i32 poison, i32 1, i32 1, i32 poison>, <8 x i32> <i32 8, i32 1, i32 2, i32 11, i32 poison, i32 13, i32 14, i32 poison>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP0]], i32 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[RETVAL_0_I_219]], i32 7
|
|
; CHECK-NEXT: [[TMP12:%.*]] = add <8 x i32> [[TMP11]], [[TMP7]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 1, i32 1>, i32 [[RETVAL_0_I_219]], i32 0
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
|
|
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[TMP13]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP12]])
|
|
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP16]])
|
|
; CHECK-NEXT: [[OP_RDX4:%.*]] = or i32 [[TMP18]], [[TMP17]]
|
|
; CHECK-NEXT: ret i32 [[OP_RDX4]]
|
|
;
|
|
entry:
|
|
%0 = load i32, ptr %f, align 4
|
|
br label %d.exit.3
|
|
|
|
if.end.i.1:
|
|
br label %d.exit.3
|
|
|
|
if.end.i.2:
|
|
br i1 false, label %d.exit.3, label %d.exit.6
|
|
|
|
d.exit.3:
|
|
%retval.0.i.2191 = phi i32 [ 1, %if.end.i.2 ], [ 0, %entry ], [ 0, %if.end.i.1 ]
|
|
%retval.0.i91117 = phi i32 [ 0, %if.end.i.2 ], [ 0, %entry ], [ 1, %if.end.i.1 ]
|
|
br i1 %tobool.i.4, label %d.exit.4, label %d.exit.6
|
|
|
|
d.exit.4:
|
|
br label %d.exit.6
|
|
|
|
if.end.i.5:
|
|
br i1 false, label %d.exit.6, label %d.exit.7
|
|
|
|
d.exit.6:
|
|
%retval.0.i.549 = phi i32 [ -1, %if.end.i.5 ], [ 0, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
|
|
%retval.0.i.3272947 = phi i32 [ 0, %if.end.i.5 ], [ 1, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
|
|
%retval.0.i.11315253145 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i.2191, %d.exit.3 ], [ 0, %if.end.i.2 ], [ %retval.0.i.2191, %d.exit.4 ]
|
|
%retval.0.i91117233343 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i91117, %d.exit.3 ], [ 0, %if.end.i.2 ], [ %retval.0.i91117, %d.exit.4 ]
|
|
%retval.0.i.219213541 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i.219, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
|
|
%retval.0.i.43739 = phi i32 [ 1, %if.end.i.5 ], [ 0, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
|
|
br label %d.exit.7
|
|
|
|
d.exit.7:
|
|
%retval.0.i.4373961 = phi i32 [ %retval.0.i.43739, %d.exit.6 ], [ 0, %if.end.i.5 ]
|
|
%retval.0.i.21921354159 = phi i32 [ %retval.0.i.219213541, %d.exit.6 ], [ 0, %if.end.i.5 ]
|
|
%retval.0.i9111723334357 = phi i32 [ %retval.0.i91117233343, %d.exit.6 ], [ 0, %if.end.i.5 ]
|
|
%retval.0.i.1131525314555 = phi i32 [ %retval.0.i.11315253145, %d.exit.6 ], [ 0, %if.end.i.5 ]
|
|
%retval.0.i.327294753 = phi i32 [ %retval.0.i.3272947, %d.exit.6 ], [ 0, %if.end.i.5 ]
|
|
%retval.0.i.54951 = phi i32 [ %retval.0.i.549, %d.exit.6 ], [ 0, %if.end.i.5 ]
|
|
%add.5 = add nsw i32 %retval.0.i.54951, 1
|
|
%add.4 = add i32 %0, %retval.0.i.4373961
|
|
%add.3 = add i32 %retval.0.i.219, %retval.0.i.327294753
|
|
%add.2 = add i32 %retval.0.i.21921354159, 1
|
|
%add.1 = add i32 %retval.0.i.219, %retval.0.i.1131525314555
|
|
%add = add i32 %retval.0.i.219, %retval.0.i9111723334357
|
|
%add1 = add nsw i32 %retval.0.i9111723334357, 1
|
|
%1 = or i32 %add, %add1
|
|
%2 = or i32 %add.1, %1
|
|
%add1.1 = add nsw i32 %retval.0.i.1131525314555, 1
|
|
%or2.1 = or i32 %2, %add1.1
|
|
%3 = or i32 %add.2, %or2.1
|
|
%add1.2 = add i32 %retval.0.i.219, %retval.0.i.21921354159
|
|
%or2.2 = or i32 %3, %add1.2
|
|
%4 = or i32 %add.3, %or2.2
|
|
%add1.3 = add nsw i32 %retval.0.i.327294753, 1
|
|
%or2.3 = or i32 %4, %add1.3
|
|
%5 = or i32 %add.4, %or2.3
|
|
%add1.4 = add nsw i32 %retval.0.i.4373961, 1
|
|
%or2.4 = or i32 %5, %add1.4
|
|
%6 = or i32 %add.5, %or2.4
|
|
%add1.5 = add i32 %retval.0.i.219, %retval.0.i.54951
|
|
%or2.5 = or i32 %6, %add1.5
|
|
ret i32 %or2.5
|
|
}
|