mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-03 01:36:06 +00:00

Summary: This patch tries to vectorize loads of consecutive memory accesses, accessed in non-consecutive or jumbled way. An earlier attempt was made with patch D26905 which was reverted back due to some basic issue with representing the 'use mask' of jumbled accesses. This patch fixes the mask representation by recording the 'use mask' in the usertree entry. Change-Id: I9fe7f5045f065d84c126fa307ef6ebe0787296df Reviewers: mkuper, loladiro, Ayal, zvi, danielcdh Reviewed By: Ayal Subscribers: mgrang, dcaballe, hans, mzolotukhin Differential Revision: https://reviews.llvm.org/D36130 llvm-svn: 320548
226 lines
12 KiB
LLVM
226 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -slp-vectorizer | FileCheck %s
|
|
|
|
;void phiUsingLoads(int *restrict A, int *restrict B) {
|
|
; int tmp0, tmp1, tmp2, tmp3;
|
|
; for (int i = 0; i < 100; i++) {
|
|
; if (A[0] == 0) {
|
|
; tmp0 = A[i + 0];
|
|
; tmp1 = A[i + 1];
|
|
; tmp2 = A[i + 2];
|
|
; tmp3 = A[i + 3];
|
|
; } else if (A[25] == 0) {
|
|
; tmp0 = A[i + 0];
|
|
; tmp1 = A[i + 1];
|
|
; tmp2 = A[i + 2];
|
|
; tmp3 = A[i + 3];
|
|
; } else if (A[50] == 0) {
|
|
; tmp0 = A[i + 0];
|
|
; tmp1 = A[i + 1];
|
|
; tmp2 = A[i + 2];
|
|
; tmp3 = A[i + 3];
|
|
; } else if (A[75] == 0) {
|
|
; tmp0 = A[i + 0];
|
|
; tmp1 = A[i + 1];
|
|
; tmp2 = A[i + 3];
|
|
; tmp3 = A[i + 2];
|
|
; }
|
|
; }
|
|
; B[0] = tmp0;
|
|
; B[1] = tmp1;
|
|
; B[2] = tmp2;
|
|
; B[3] = tmp3;
|
|
;}
|
|
|
|
|
|
; Function Attrs: norecurse nounwind uwtable
|
|
define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: @phiUsingLoads(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 25
|
|
; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 50
|
|
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 75
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
|
|
; CHECK-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
|
|
; CHECK-NEXT: [[ARRAYIDX66:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B]] to <4 x i32>*
|
|
; CHECK-NEXT: store <4 x i32> [[TMP27:%.*]], <4 x i32>* [[TMP1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP27]], [[FOR_INC]] ]
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
|
|
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: if.else:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4
|
|
; CHECK-NEXT: [[CMP13:%.*]] = icmp eq i32 [[TMP8]], 0
|
|
; CHECK-NEXT: br i1 [[CMP13]], label [[IF_THEN14:%.*]], label [[IF_ELSE27:%.*]]
|
|
; CHECK: if.then14:
|
|
; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
|
|
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX17]] to <4 x i32>*
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: if.else27:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4
|
|
; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[TMP14]], 0
|
|
; CHECK-NEXT: br i1 [[CMP29]], label [[IF_THEN30:%.*]], label [[IF_ELSE43:%.*]]
|
|
; CHECK: if.then30:
|
|
; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
|
|
; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX33]] to <4 x i32>*
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: if.else43:
|
|
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX44]], align 4
|
|
; CHECK-NEXT: [[CMP45:%.*]] = icmp eq i32 [[TMP20]], 0
|
|
; CHECK-NEXT: br i1 [[CMP45]], label [[IF_THEN46:%.*]], label [[FOR_INC]]
|
|
; CHECK: if.then46:
|
|
; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
|
|
; CHECK-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]]
|
|
; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP23]]
|
|
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>*
|
|
; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
|
|
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[TMP27]] = phi <4 x i32> [ [[TMP7]], [[IF_THEN]] ], [ [[TMP13]], [[IF_THEN14]] ], [ [[TMP19]], [[IF_THEN30]] ], [ [[TMP26]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
|
|
;
|
|
entry:
|
|
%0 = load i32, i32* %A, align 4
|
|
%cmp1 = icmp eq i32 %0, 0
|
|
%arrayidx12 = getelementptr inbounds i32, i32* %A, i64 25
|
|
%arrayidx28 = getelementptr inbounds i32, i32* %A, i64 50
|
|
%arrayidx44 = getelementptr inbounds i32, i32* %A, i64 75
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.inc
|
|
store i32 %tmp0.1, i32* %B, align 4
|
|
%arrayidx64 = getelementptr inbounds i32, i32* %B, i64 1
|
|
store i32 %tmp1.1, i32* %arrayidx64, align 4
|
|
%arrayidx65 = getelementptr inbounds i32, i32* %B, i64 2
|
|
store i32 %tmp2.1, i32* %arrayidx65, align 4
|
|
%arrayidx66 = getelementptr inbounds i32, i32* %B, i64 3
|
|
store i32 %tmp3.1, i32* %arrayidx66, align 4
|
|
ret void
|
|
|
|
for.body: ; preds = %for.inc, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
|
|
%tmp3.0111 = phi i32 [ undef, %entry ], [ %tmp3.1, %for.inc ]
|
|
%tmp2.0110 = phi i32 [ undef, %entry ], [ %tmp2.1, %for.inc ]
|
|
%tmp1.0109 = phi i32 [ undef, %entry ], [ %tmp1.1, %for.inc ]
|
|
%tmp0.0108 = phi i32 [ undef, %entry ], [ %tmp0.1, %for.inc ]
|
|
br i1 %cmp1, label %if.then, label %if.else
|
|
|
|
if.then: ; preds = %for.body
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
|
%1 = load i32, i32* %arrayidx2, align 4
|
|
%2 = add nuw nsw i64 %indvars.iv, 1
|
|
%arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %2
|
|
%3 = load i32, i32* %arrayidx5, align 4
|
|
%4 = add nuw nsw i64 %indvars.iv, 2
|
|
%arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %4
|
|
%5 = load i32, i32* %arrayidx8, align 4
|
|
%6 = add nuw nsw i64 %indvars.iv, 3
|
|
%arrayidx11 = getelementptr inbounds i32, i32* %A, i64 %6
|
|
%7 = load i32, i32* %arrayidx11, align 4
|
|
br label %for.inc
|
|
|
|
if.else: ; preds = %for.body
|
|
%8 = load i32, i32* %arrayidx12, align 4
|
|
%cmp13 = icmp eq i32 %8, 0
|
|
br i1 %cmp13, label %if.then14, label %if.else27
|
|
|
|
if.then14: ; preds = %if.else
|
|
%arrayidx17 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
|
%9 = load i32, i32* %arrayidx17, align 4
|
|
%10 = add nuw nsw i64 %indvars.iv, 1
|
|
%arrayidx20 = getelementptr inbounds i32, i32* %A, i64 %10
|
|
%11 = load i32, i32* %arrayidx20, align 4
|
|
%12 = add nuw nsw i64 %indvars.iv, 2
|
|
%arrayidx23 = getelementptr inbounds i32, i32* %A, i64 %12
|
|
%13 = load i32, i32* %arrayidx23, align 4
|
|
%14 = add nuw nsw i64 %indvars.iv, 3
|
|
%arrayidx26 = getelementptr inbounds i32, i32* %A, i64 %14
|
|
%15 = load i32, i32* %arrayidx26, align 4
|
|
br label %for.inc
|
|
|
|
if.else27: ; preds = %if.else
|
|
%16 = load i32, i32* %arrayidx28, align 4
|
|
%cmp29 = icmp eq i32 %16, 0
|
|
br i1 %cmp29, label %if.then30, label %if.else43
|
|
|
|
if.then30: ; preds = %if.else27
|
|
%arrayidx33 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
|
%17 = load i32, i32* %arrayidx33, align 4
|
|
%18 = add nuw nsw i64 %indvars.iv, 1
|
|
%arrayidx36 = getelementptr inbounds i32, i32* %A, i64 %18
|
|
%19 = load i32, i32* %arrayidx36, align 4
|
|
%20 = add nuw nsw i64 %indvars.iv, 2
|
|
%arrayidx39 = getelementptr inbounds i32, i32* %A, i64 %20
|
|
%21 = load i32, i32* %arrayidx39, align 4
|
|
%22 = add nuw nsw i64 %indvars.iv, 3
|
|
%arrayidx42 = getelementptr inbounds i32, i32* %A, i64 %22
|
|
%23 = load i32, i32* %arrayidx42, align 4
|
|
br label %for.inc
|
|
|
|
if.else43: ; preds = %if.else27
|
|
%24 = load i32, i32* %arrayidx44, align 4
|
|
%cmp45 = icmp eq i32 %24, 0
|
|
br i1 %cmp45, label %if.then46, label %for.inc
|
|
|
|
if.then46: ; preds = %if.else43
|
|
%arrayidx49 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
|
%25 = load i32, i32* %arrayidx49, align 4
|
|
%26 = add nuw nsw i64 %indvars.iv, 1
|
|
%arrayidx52 = getelementptr inbounds i32, i32* %A, i64 %26
|
|
%27 = load i32, i32* %arrayidx52, align 4
|
|
%28 = add nuw nsw i64 %indvars.iv, 3
|
|
%arrayidx55 = getelementptr inbounds i32, i32* %A, i64 %28
|
|
%29 = load i32, i32* %arrayidx55, align 4
|
|
%30 = add nuw nsw i64 %indvars.iv, 2
|
|
%arrayidx58 = getelementptr inbounds i32, i32* %A, i64 %30
|
|
%31 = load i32, i32* %arrayidx58, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %if.then, %if.then30, %if.else43, %if.then46, %if.then14
|
|
%tmp0.1 = phi i32 [ %1, %if.then ], [ %9, %if.then14 ], [ %17, %if.then30 ], [ %25, %if.then46 ], [ %tmp0.0108, %if.else43 ]
|
|
%tmp1.1 = phi i32 [ %3, %if.then ], [ %11, %if.then14 ], [ %19, %if.then30 ], [ %27, %if.then46 ], [ %tmp1.0109, %if.else43 ]
|
|
%tmp2.1 = phi i32 [ %5, %if.then ], [ %13, %if.then14 ], [ %21, %if.then30 ], [ %29, %if.then46 ], [ %tmp2.0110, %if.else43 ]
|
|
%tmp3.1 = phi i32 [ %7, %if.then ], [ %15, %if.then14 ], [ %23, %if.then30 ], [ %31, %if.then46 ], [ %tmp3.0111, %if.else43 ]
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 100
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
}
|