Revert "[SLP]Reduce number of alternate instruction, where possible"

This caused failures such as:

  Instruction does not dominate all uses!
  %29 = insertelement <8 x i64> %28, i64 %xor6.i.5, i64 6
  %17 = shufflevector <8 x i64> %29, <8 x i64> poison, <6 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>

see comment on https://github.com/llvm/llvm-project/pull/123360

> Previous version was reviewed here https://github.com/llvm/llvm-project/pull/123360
> It is mostly the same, adjusted after graph-to-tree transformation
>
> Patch tries to remove wide alternate operations.
> Currently SLP vectorizer emits something like this:
> ```
> %0 = add i32
> %1 = sub i32
> %2 = add i32
> %3 = sub i32
> %4 = add i32
> %5 = sub i32
> %6 = add i32
> %7 = sub i32
>
> transformes to
>
> %v1 = add <8 x i32>
> %v2 = sub <8 x i32>
> %res = shuffle %v1, %v2, <0, 9, 2, 11, 4, 13, 6, 15>
> ```
> i.e. half of the results are just unused. This leads to increased
> register pressure and potentially doubles number of operations.
>
> Patch introduces SplitVectorize mode, where it splits the operations by
> opcodes and produces instead something like this:
> ```
> %v1 = add <4 x i32>
> %v2 = sub <4 x i32>
> %res = shuffle %v1, %v2, <0, 4, 1, 5, 2, 6, 3, 7>
> ```
> It allows to improve the performance by reducing number of ops. Also, it
> turns on some other improvements, like improved graph reordering.
>
> [...]

This reverts commit 9d37e61fc77d3d6de891c30630f1c0227522031d as well as
the follow-up commit 72bb0a9a9c6fdde43e1e191f2dc0d5d2d46aff4e.
This commit is contained in:
Hans Wennborg 2025-03-11 14:51:37 +01:00
parent b9d6b62721
commit e858b10917
34 changed files with 794 additions and 2744 deletions

View File

@ -1771,10 +1771,6 @@ public:
/// scalable version of the vectorized loop.
bool preferFixedOverScalableIfEqualCost() const;
/// \returns True if target prefers SLP vectorizer with altermate opcode
/// vectorization, false - otherwise.
bool preferAlternateOpcodeVectorization() const;
/// \returns True if the target prefers reductions in loop.
bool preferInLoopReduction(unsigned Opcode, Type *Ty) const;
@ -2329,7 +2325,6 @@ public:
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty) const = 0;
virtual bool preferPredicatedReductionSelect(unsigned Opcode,
Type *Ty) const = 0;
virtual bool preferAlternateOpcodeVectorization() const = 0;
virtual bool preferEpilogueVectorization() const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
@ -3140,9 +3135,6 @@ public:
bool preferInLoopReduction(unsigned Opcode, Type *Ty) const override {
return Impl.preferInLoopReduction(Opcode, Ty);
}
bool preferAlternateOpcodeVectorization() const override {
return Impl.preferAlternateOpcodeVectorization();
}
bool preferPredicatedReductionSelect(unsigned Opcode,
Type *Ty) const override {
return Impl.preferPredicatedReductionSelect(Opcode, Ty);

View File

@ -1007,7 +1007,6 @@ public:
bool preferFixedOverScalableIfEqualCost() const { return false; }
bool preferInLoopReduction(unsigned Opcode, Type *Ty) const { return false; }
bool preferAlternateOpcodeVectorization() const { return true; }
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty) const {
return false;

View File

@ -1384,10 +1384,6 @@ bool TargetTransformInfo::preferInLoopReduction(unsigned Opcode,
return TTIImpl->preferInLoopReduction(Opcode, Ty);
}
bool TargetTransformInfo::preferAlternateOpcodeVectorization() const {
return TTIImpl->preferAlternateOpcodeVectorization();
}
bool TargetTransformInfo::preferPredicatedReductionSelect(unsigned Opcode,
Type *Ty) const {
return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty);

View File

@ -125,8 +125,6 @@ public:
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
bool preferAlternateOpcodeVectorization() const { return false; }
bool preferEpilogueVectorization() const {
// Epilogue vectorization is usually unprofitable - tail folding or
// a smaller VF would have been better. This a blunt hammer - we

View File

@ -292,7 +292,6 @@ public:
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool preferAlternateOpcodeVectorization() const { return false; }
bool prefersVectorizedAddressing() const;
bool supportsEfficientVectorElementLoadStore() const;
bool enableInterleavedAccessVectorization();

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -67,11 +67,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040
; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0
; GFX7-NEXT: v_readfirstlane_b32 s54, v0
; GFX7-NEXT: v_readfirstlane_b32 s59, v0
; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_readlane_b32 s55, v23, 16
; GFX7-NEXT: v_readlane_b32 s54, v23, 15
@ -133,13 +133,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32
; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32
; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0
; GFX8-NEXT: v_readfirstlane_b32 s54, v0
; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v23, 16
; GFX8-NEXT: v_readlane_b32 s54, v23, 15
@ -200,13 +199,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
; GFX900-NEXT: v_readfirstlane_b32 s54, v0
; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v23, 16
; GFX900-NEXT: v_readlane_b32 s54, v23, 15
@ -265,13 +263,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_addc_u32 s59, s32, 0x4040
; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX942-NEXT: s_bitcmp1_b32 s59, 0
; GFX942-NEXT: s_bitset0_b32 s59, 0
; GFX942-NEXT: s_mov_b32 s54, s59
; GFX942-NEXT: s_addc_u32 s60, s32, 0x4040
; GFX942-NEXT: s_bitcmp1_b32 s60, 0
; GFX942-NEXT: s_bitset0_b32 s60, 0
; GFX942-NEXT: s_mov_b32 s59, s60
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v23, 16
; GFX942-NEXT: v_readlane_b32 s54, v23, 15
@ -332,11 +329,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32
; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
; GFX10_1-NEXT: v_readfirstlane_b32 s54, v24
; GFX10_1-NEXT: v_readfirstlane_b32 s59, v24
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16
; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15
@ -397,11 +393,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32
; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
; GFX10_3-NEXT: v_readfirstlane_b32 s54, v24
; GFX10_3-NEXT: v_readfirstlane_b32 s59, v24
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16
; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15
@ -461,14 +456,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_addc_u32 s59, s32, 0x4040
; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX11-NEXT: s_addc_u32 s60, s32, 0x4040
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_bitcmp1_b32 s59, 0
; GFX11-NEXT: s_bitset0_b32 s59, 0
; GFX11-NEXT: s_mov_b32 s54, s59
; GFX11-NEXT: s_bitcmp1_b32 s60, 0
; GFX11-NEXT: s_bitset0_b32 s60, 0
; GFX11-NEXT: s_mov_b32 s59, s60
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v23, 16
; GFX11-NEXT: v_readlane_b32 s54, v23, 15
@ -530,15 +524,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_add_co_ci_u32 s59, s32, 0x4000
; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX12-NEXT: s_add_co_ci_u32 s60, s32, 0x4000
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_bitcmp1_b32 s59, 0
; GFX12-NEXT: s_bitset0_b32 s59, 0
; GFX12-NEXT: s_bitcmp1_b32 s60, 0
; GFX12-NEXT: s_bitset0_b32 s60, 0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_mov_b32 s54, s59
; GFX12-NEXT: s_mov_b32 s59, s60
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: v_readlane_b32 s55, v23, 16
; GFX12-NEXT: v_readlane_b32 s54, v23, 15
@ -586,7 +579,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; scc is unavailable since it is live in
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s54},{scc}"(
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s59},{scc}"(
<16 x i32> %s0,
<16 x i32> %s1,
<16 x i32> %s2,
@ -636,9 +629,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32
; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22
; GFX7-NEXT: v_readfirstlane_b32 s54, v22
; GFX7-NEXT: v_readfirstlane_b32 s59, v22
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_readlane_b32 s55, v21, 16
; GFX7-NEXT: v_readlane_b32 s54, v21, 15
@ -693,11 +686,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32
; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22
; GFX8-NEXT: v_readfirstlane_b32 s54, v22
; GFX8-NEXT: v_readfirstlane_b32 s59, v22
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v21, 16
; GFX8-NEXT: v_readlane_b32 s54, v21, 15
@ -752,11 +744,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32
; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX900-NEXT: v_add_u32_e32 v22, 16, v22
; GFX900-NEXT: v_readfirstlane_b32 s54, v22
; GFX900-NEXT: v_readfirstlane_b32 s59, v22
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v21, 16
; GFX900-NEXT: v_readlane_b32 s54, v21, 15
@ -810,13 +801,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_addc_u32 s59, s32, 16
; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX942-NEXT: s_bitcmp1_b32 s59, 0
; GFX942-NEXT: s_bitset0_b32 s59, 0
; GFX942-NEXT: s_mov_b32 s54, s59
; GFX942-NEXT: s_addc_u32 s60, s32, 16
; GFX942-NEXT: s_bitcmp1_b32 s60, 0
; GFX942-NEXT: s_bitset0_b32 s60, 0
; GFX942-NEXT: s_mov_b32 s59, s60
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v21, 16
; GFX942-NEXT: v_readlane_b32 s54, v21, 15
@ -872,11 +862,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32
; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22
; GFX10_1-NEXT: v_readfirstlane_b32 s54, v22
; GFX10_1-NEXT: v_readfirstlane_b32 s59, v22
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16
; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15
@ -932,11 +921,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32
; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22
; GFX10_3-NEXT: v_readfirstlane_b32 s54, v22
; GFX10_3-NEXT: v_readfirstlane_b32 s59, v22
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16
; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15
@ -990,14 +978,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_addc_u32 s59, s32, 16
; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX11-NEXT: s_addc_u32 s60, s32, 16
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_bitcmp1_b32 s59, 0
; GFX11-NEXT: s_bitset0_b32 s59, 0
; GFX11-NEXT: s_mov_b32 s54, s59
; GFX11-NEXT: s_bitcmp1_b32 s60, 0
; GFX11-NEXT: s_bitset0_b32 s60, 0
; GFX11-NEXT: s_mov_b32 s59, s60
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v21, 16
; GFX11-NEXT: v_readlane_b32 s54, v21, 15
@ -1055,10 +1042,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX12-NEXT: s_mov_b32 s54, s32
; GFX12-NEXT: s_mov_b32 s59, s32
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_readlane_b32 s55, v21, 16
@ -1105,7 +1091,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; scc is unavailable since it is live in
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s54},{scc}"(
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s59},{scc}"(
<16 x i32> %s0,
<16 x i32> %s1,
<16 x i32> %s2,
@ -1165,9 +1151,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_readlane_b32 s54, v22, 0
; GFX7-NEXT: v_readlane_b32 s59, v22, 0
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_readlane_b32 s55, v23, 16
; GFX7-NEXT: v_readlane_b32 s54, v23, 15
@ -1202,66 +1188,58 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX8-NEXT: s_add_i32 s6, s32, 0x201100
; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_writelane_b32 v23, s30, 0
; GFX8-NEXT: v_writelane_b32 v23, s31, 1
; GFX8-NEXT: v_writelane_b32 v23, s33, 2
; GFX8-NEXT: v_writelane_b32 v23, s34, 3
; GFX8-NEXT: v_writelane_b32 v23, s35, 4
; GFX8-NEXT: v_writelane_b32 v23, s36, 5
; GFX8-NEXT: v_writelane_b32 v23, s37, 6
; GFX8-NEXT: v_writelane_b32 v23, s38, 7
; GFX8-NEXT: v_writelane_b32 v23, s39, 8
; GFX8-NEXT: v_writelane_b32 v23, s48, 9
; GFX8-NEXT: v_writelane_b32 v23, s49, 10
; GFX8-NEXT: v_writelane_b32 v23, s50, 11
; GFX8-NEXT: v_writelane_b32 v23, s51, 12
; GFX8-NEXT: v_writelane_b32 v23, s52, 13
; GFX8-NEXT: s_lshr_b32 s5, s32, 6
; GFX8-NEXT: v_writelane_b32 v23, s53, 14
; GFX8-NEXT: v_writelane_b32 v22, s30, 0
; GFX8-NEXT: v_writelane_b32 v22, s31, 1
; GFX8-NEXT: v_writelane_b32 v22, s33, 2
; GFX8-NEXT: v_writelane_b32 v22, s34, 3
; GFX8-NEXT: v_writelane_b32 v22, s35, 4
; GFX8-NEXT: v_writelane_b32 v22, s36, 5
; GFX8-NEXT: v_writelane_b32 v22, s37, 6
; GFX8-NEXT: v_writelane_b32 v22, s38, 7
; GFX8-NEXT: v_writelane_b32 v22, s39, 8
; GFX8-NEXT: v_writelane_b32 v22, s48, 9
; GFX8-NEXT: v_writelane_b32 v22, s49, 10
; GFX8-NEXT: v_writelane_b32 v22, s50, 11
; GFX8-NEXT: v_writelane_b32 v22, s51, 12
; GFX8-NEXT: v_writelane_b32 v22, s52, 13
; GFX8-NEXT: s_lshr_b32 s4, s32, 6
; GFX8-NEXT: v_writelane_b32 v22, s53, 14
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: s_add_i32 s4, s5, 0x4240
; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
; GFX8-NEXT: v_writelane_b32 v23, s54, 15
; GFX8-NEXT: s_add_i32 s59, s4, 0x4240
; GFX8-NEXT: v_writelane_b32 v22, s54, 15
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: v_writelane_b32 v22, s4, 0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
; GFX8-NEXT: v_writelane_b32 v23, s55, 16
; GFX8-NEXT: v_writelane_b32 v22, s55, 16
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX8-NEXT: v_readlane_b32 s54, v22, 0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v23, 16
; GFX8-NEXT: v_readlane_b32 s54, v23, 15
; GFX8-NEXT: v_readlane_b32 s53, v23, 14
; GFX8-NEXT: v_readlane_b32 s52, v23, 13
; GFX8-NEXT: v_readlane_b32 s51, v23, 12
; GFX8-NEXT: v_readlane_b32 s50, v23, 11
; GFX8-NEXT: v_readlane_b32 s49, v23, 10
; GFX8-NEXT: v_readlane_b32 s48, v23, 9
; GFX8-NEXT: v_readlane_b32 s39, v23, 8
; GFX8-NEXT: v_readlane_b32 s38, v23, 7
; GFX8-NEXT: v_readlane_b32 s37, v23, 6
; GFX8-NEXT: v_readlane_b32 s36, v23, 5
; GFX8-NEXT: v_readlane_b32 s35, v23, 4
; GFX8-NEXT: v_readlane_b32 s34, v23, 3
; GFX8-NEXT: v_readlane_b32 s33, v23, 2
; GFX8-NEXT: v_readlane_b32 s31, v23, 1
; GFX8-NEXT: v_readlane_b32 s30, v23, 0
; GFX8-NEXT: v_readlane_b32 s55, v22, 16
; GFX8-NEXT: v_readlane_b32 s54, v22, 15
; GFX8-NEXT: v_readlane_b32 s53, v22, 14
; GFX8-NEXT: v_readlane_b32 s52, v22, 13
; GFX8-NEXT: v_readlane_b32 s51, v22, 12
; GFX8-NEXT: v_readlane_b32 s50, v22, 11
; GFX8-NEXT: v_readlane_b32 s49, v22, 10
; GFX8-NEXT: v_readlane_b32 s48, v22, 9
; GFX8-NEXT: v_readlane_b32 s39, v22, 8
; GFX8-NEXT: v_readlane_b32 s38, v22, 7
; GFX8-NEXT: v_readlane_b32 s37, v22, 6
; GFX8-NEXT: v_readlane_b32 s36, v22, 5
; GFX8-NEXT: v_readlane_b32 s35, v22, 4
; GFX8-NEXT: v_readlane_b32 s34, v22, 3
; GFX8-NEXT: v_readlane_b32 s33, v22, 2
; GFX8-NEXT: v_readlane_b32 s31, v22, 1
; GFX8-NEXT: v_readlane_b32 s30, v22, 0
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_add_i32 s6, s32, 0x201100
; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
@ -1272,66 +1250,58 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX900-NEXT: s_add_i32 s6, s32, 0x201100
; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v23, s30, 0
; GFX900-NEXT: v_writelane_b32 v23, s31, 1
; GFX900-NEXT: v_writelane_b32 v23, s33, 2
; GFX900-NEXT: v_writelane_b32 v23, s34, 3
; GFX900-NEXT: v_writelane_b32 v23, s35, 4
; GFX900-NEXT: v_writelane_b32 v23, s36, 5
; GFX900-NEXT: v_writelane_b32 v23, s37, 6
; GFX900-NEXT: v_writelane_b32 v23, s38, 7
; GFX900-NEXT: v_writelane_b32 v23, s39, 8
; GFX900-NEXT: v_writelane_b32 v23, s48, 9
; GFX900-NEXT: v_writelane_b32 v23, s49, 10
; GFX900-NEXT: v_writelane_b32 v23, s50, 11
; GFX900-NEXT: v_writelane_b32 v23, s51, 12
; GFX900-NEXT: v_writelane_b32 v23, s52, 13
; GFX900-NEXT: s_lshr_b32 s5, s32, 6
; GFX900-NEXT: v_writelane_b32 v23, s53, 14
; GFX900-NEXT: v_writelane_b32 v22, s30, 0
; GFX900-NEXT: v_writelane_b32 v22, s31, 1
; GFX900-NEXT: v_writelane_b32 v22, s33, 2
; GFX900-NEXT: v_writelane_b32 v22, s34, 3
; GFX900-NEXT: v_writelane_b32 v22, s35, 4
; GFX900-NEXT: v_writelane_b32 v22, s36, 5
; GFX900-NEXT: v_writelane_b32 v22, s37, 6
; GFX900-NEXT: v_writelane_b32 v22, s38, 7
; GFX900-NEXT: v_writelane_b32 v22, s39, 8
; GFX900-NEXT: v_writelane_b32 v22, s48, 9
; GFX900-NEXT: v_writelane_b32 v22, s49, 10
; GFX900-NEXT: v_writelane_b32 v22, s50, 11
; GFX900-NEXT: v_writelane_b32 v22, s51, 12
; GFX900-NEXT: v_writelane_b32 v22, s52, 13
; GFX900-NEXT: s_lshr_b32 s4, s32, 6
; GFX900-NEXT: v_writelane_b32 v22, s53, 14
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: s_add_i32 s4, s5, 0x4240
; GFX900-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
; GFX900-NEXT: v_writelane_b32 v23, s54, 15
; GFX900-NEXT: s_add_i32 s59, s4, 0x4240
; GFX900-NEXT: v_writelane_b32 v22, s54, 15
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: v_writelane_b32 v22, s4, 0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
; GFX900-NEXT: v_writelane_b32 v23, s55, 16
; GFX900-NEXT: v_writelane_b32 v22, s55, 16
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX900-NEXT: v_readlane_b32 s54, v22, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v23, 16
; GFX900-NEXT: v_readlane_b32 s54, v23, 15
; GFX900-NEXT: v_readlane_b32 s53, v23, 14
; GFX900-NEXT: v_readlane_b32 s52, v23, 13
; GFX900-NEXT: v_readlane_b32 s51, v23, 12
; GFX900-NEXT: v_readlane_b32 s50, v23, 11
; GFX900-NEXT: v_readlane_b32 s49, v23, 10
; GFX900-NEXT: v_readlane_b32 s48, v23, 9
; GFX900-NEXT: v_readlane_b32 s39, v23, 8
; GFX900-NEXT: v_readlane_b32 s38, v23, 7
; GFX900-NEXT: v_readlane_b32 s37, v23, 6
; GFX900-NEXT: v_readlane_b32 s36, v23, 5
; GFX900-NEXT: v_readlane_b32 s35, v23, 4
; GFX900-NEXT: v_readlane_b32 s34, v23, 3
; GFX900-NEXT: v_readlane_b32 s33, v23, 2
; GFX900-NEXT: v_readlane_b32 s31, v23, 1
; GFX900-NEXT: v_readlane_b32 s30, v23, 0
; GFX900-NEXT: v_readlane_b32 s55, v22, 16
; GFX900-NEXT: v_readlane_b32 s54, v22, 15
; GFX900-NEXT: v_readlane_b32 s53, v22, 14
; GFX900-NEXT: v_readlane_b32 s52, v22, 13
; GFX900-NEXT: v_readlane_b32 s51, v22, 12
; GFX900-NEXT: v_readlane_b32 s50, v22, 11
; GFX900-NEXT: v_readlane_b32 s49, v22, 10
; GFX900-NEXT: v_readlane_b32 s48, v22, 9
; GFX900-NEXT: v_readlane_b32 s39, v22, 8
; GFX900-NEXT: v_readlane_b32 s38, v22, 7
; GFX900-NEXT: v_readlane_b32 s37, v22, 6
; GFX900-NEXT: v_readlane_b32 s36, v22, 5
; GFX900-NEXT: v_readlane_b32 s35, v22, 4
; GFX900-NEXT: v_readlane_b32 s34, v22, 3
; GFX900-NEXT: v_readlane_b32 s33, v22, 2
; GFX900-NEXT: v_readlane_b32 s31, v22, 1
; GFX900-NEXT: v_readlane_b32 s30, v22, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_add_i32 s6, s32, 0x201100
; GFX900-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0)
@ -1369,12 +1339,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_add_i32 s58, s32, 0x4240
; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX942-NEXT: s_add_i32 s59, s32, 0x4240
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-NEXT: s_mov_b32 s54, s58
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v22, 16
; GFX942-NEXT: v_readlane_b32 s54, v22, 15
@ -1411,7 +1379,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5
; GFX10_1-NEXT: s_add_i32 s58, s4, 0x4240
; GFX10_1-NEXT: s_add_i32 s59, s4, 0x4240
; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
@ -1436,10 +1404,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_1-NEXT: s_mov_b32 s54, s58
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16
; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15
@ -1476,7 +1442,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5
; GFX10_3-NEXT: s_add_i32 s58, s4, 0x4240
; GFX10_3-NEXT: s_add_i32 s59, s4, 0x4240
; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
@ -1501,10 +1467,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_3-NEXT: s_mov_b32 s54, s58
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16
; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15
@ -1539,7 +1503,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v22, s30, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
; GFX11-NEXT: s_add_i32 s58, s32, 0x4240
; GFX11-NEXT: s_add_i32 s59, s32, 0x4240
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
; GFX11-NEXT: v_writelane_b32 v22, s31, 1
@ -1564,10 +1528,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX11-NEXT: s_mov_b32 s54, s58
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s55, v22, 16
@ -1606,7 +1568,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: v_writelane_b32 v22, s30, 0
; GFX12-NEXT: s_add_co_i32 s58, s32, 0x4200
; GFX12-NEXT: s_add_co_i32 s59, s32, 0x4200
; GFX12-NEXT: v_mov_b32_e32 v0, s32
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
; GFX12-NEXT: ;;#ASMSTART
@ -1631,12 +1593,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_mov_b32 s54, s58
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_readlane_b32 s55, v22, 16
; GFX12-NEXT: v_readlane_b32 s54, v22, 15
; GFX12-NEXT: v_readlane_b32 s53, v22, 14
@ -1684,7 +1644,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; scc is unavailable since it is live in
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9",
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s54},{scc}"(
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s59},{scc}"(
<16 x i32> %s0,
<16 x i32> %s1,
<16 x i32> %s2,

View File

@ -44,7 +44,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; scc is unavailable since it is live in
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s64},{scc}"(
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s59},{scc}"(
<16 x i32> %s0,
<16 x i32> %s1,
<16 x i32> %s2,

View File

@ -17,12 +17,15 @@
define void @s116_modified(ptr %a) {
; CHECK-LABEL: @s116_modified(
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds float, ptr [[GEP1:%.*]], i64 4
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds float, ptr [[GEP1:%.*]], i64 2
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[GEP1]], i64 3
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[A]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[GEP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 2, i32 3, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP2]], float [[LD0]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[LD0]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP2]], i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP6]], <4 x i32> <i32 1, i32 1, i32 5, i32 6>
; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP7]]
; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[GEP1]], align 4
; CHECK-NEXT: ret void

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v | FileCheck %s
; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-20 | FileCheck %s
; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-15 | FileCheck %s --check-prefix=THR15
define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.ptr, ptr %add.ptr64) {
@ -17,122 +17,78 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1
; CHECK-NEXT: [[TMP115:%.*]] = load i8, ptr null, align 1
; CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr null, align 1
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr null, align 1
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i32>
; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = shl <4 x i32> [[TMP11]], splat (i32 16)
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP14]], [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = sub <4 x i32> [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1
; CHECK-NEXT: [[TMP23:%.*]] = zext <4 x i8> [[TMP22]] to <4 x i32>
; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1
; CHECK-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[TMP24]] to <4 x i32>
; CHECK-NEXT: [[TMP26:%.*]] = sub <4 x i32> [[TMP23]], [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
; CHECK-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
; CHECK-NEXT: [[TMP29:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
; CHECK-NEXT: [[TMP30:%.*]] = zext <4 x i8> [[TMP29]] to <4 x i32>
; CHECK-NEXT: [[TMP31:%.*]] = sub <4 x i32> [[TMP28]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = shl <4 x i32> [[TMP31]], splat (i32 16)
; CHECK-NEXT: [[TMP33:%.*]] = add <4 x i32> [[TMP32]], [[TMP26]]
; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP33]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP35:%.*]] = add <4 x i32> [[TMP34]], [[TMP33]]
; CHECK-NEXT: [[TMP36:%.*]] = sub <4 x i32> [[TMP34]], [[TMP33]]
; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP39:%.*]] = add <4 x i32> [[TMP37]], [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = sub <4 x i32> [[TMP37]], [[TMP38]]
; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32>
; CHECK-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
; CHECK-NEXT: [[TMP45:%.*]] = zext <4 x i8> [[TMP44]] to <4 x i32>
; CHECK-NEXT: [[TMP46:%.*]] = sub <4 x i32> [[TMP43]], [[TMP45]]
; CHECK-NEXT: [[TMP47:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1
; CHECK-NEXT: [[TMP48:%.*]] = zext <4 x i8> [[TMP47]] to <4 x i32>
; CHECK-NEXT: [[TMP49:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1
; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i8> [[TMP49]] to <4 x i32>
; CHECK-NEXT: [[TMP51:%.*]] = sub <4 x i32> [[TMP48]], [[TMP50]]
; CHECK-NEXT: [[TMP52:%.*]] = shl <4 x i32> [[TMP51]], splat (i32 16)
; CHECK-NEXT: [[TMP53:%.*]] = add <4 x i32> [[TMP52]], [[TMP46]]
; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP53]]
; CHECK-NEXT: [[TMP56:%.*]] = sub <4 x i32> [[TMP54]], [[TMP53]]
; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP59:%.*]] = add <4 x i32> [[TMP57]], [[TMP58]]
; CHECK-NEXT: [[TMP60:%.*]] = sub <4 x i32> [[TMP57]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP62:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2)
; CHECK-NEXT: [[TMP63:%.*]] = load <4 x i8>, ptr null, align 1
; CHECK-NEXT: [[TMP64:%.*]] = zext <4 x i8> [[TMP63]] to <4 x i32>
; CHECK-NEXT: [[TMP65:%.*]] = load <4 x i8>, ptr null, align 1
; CHECK-NEXT: [[TMP66:%.*]] = zext <4 x i8> [[TMP65]] to <4 x i32>
; CHECK-NEXT: [[TMP67:%.*]] = sub <4 x i32> [[TMP64]], [[TMP66]]
; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i8> poison, i8 [[TMP115]], i32 0
; CHECK-NEXT: [[TMP70:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP71:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP70]], <2 x i8> [[TMP62]], i64 2)
; CHECK-NEXT: [[TMP72:%.*]] = zext <4 x i8> [[TMP71]] to <4 x i32>
; CHECK-NEXT: [[TMP73:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1
; CHECK-NEXT: [[TMP74:%.*]] = zext <4 x i8> [[TMP73]] to <4 x i32>
; CHECK-NEXT: [[TMP75:%.*]] = shufflevector <4 x i32> [[TMP74]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP76:%.*]] = sub <4 x i32> [[TMP72]], [[TMP75]]
; CHECK-NEXT: [[TMP77:%.*]] = shl <4 x i32> [[TMP76]], splat (i32 16)
; CHECK-NEXT: [[TMP78:%.*]] = add <4 x i32> [[TMP77]], [[TMP68]]
; CHECK-NEXT: [[TMP79:%.*]] = shufflevector <4 x i32> [[TMP78]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP80:%.*]] = add <4 x i32> [[TMP78]], [[TMP79]]
; CHECK-NEXT: [[TMP81:%.*]] = sub <4 x i32> [[TMP78]], [[TMP79]]
; CHECK-NEXT: [[TMP82:%.*]] = shufflevector <4 x i32> [[TMP80]], <4 x i32> [[TMP81]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP83:%.*]] = shufflevector <4 x i32> [[TMP82]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP84:%.*]] = add <4 x i32> [[TMP82]], [[TMP83]]
; CHECK-NEXT: [[TMP85:%.*]] = sub <4 x i32> [[TMP82]], [[TMP83]]
; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <4 x i32> [[TMP84]], <4 x i32> [[TMP85]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP87:%.*]] = add <4 x i32> [[TMP41]], [[TMP21]]
; CHECK-NEXT: [[TMP88:%.*]] = sub <4 x i32> [[TMP21]], [[TMP41]]
; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP90:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP89]], <4 x i32> [[TMP87]], i64 4)
; CHECK-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]]
; CHECK-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]]
; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP94:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP93]], <4 x i32> [[TMP91]], i64 4)
; CHECK-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]]
; CHECK-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]]
; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 12, i32 8, i32 13, i32 9, i32 14, i32 10, i32 15, i32 11>
; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> <i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <4 x i32> [[TMP43]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <16 x i32> [[TMP98]], <16 x i32> [[TMP99]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP101:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP102:%.*]] = shufflevector <16 x i32> [[TMP100]], <16 x i32> [[TMP101]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 18, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP103:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP104:%.*]] = shufflevector <16 x i32> [[TMP102]], <16 x i32> [[TMP103]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 poison, i32 18, i32 19>
; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 poison, i32 17, i32 poison, i32 12, i32 poison, i32 14, i32 15>
; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP106]], <16 x i32> [[TMP107]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 10, i32 17, i32 12, i32 18, i32 14, i32 15>
; CHECK-NEXT: [[TMP109:%.*]] = lshr <16 x i32> [[TMP108]], splat (i32 15)
; CHECK-NEXT: [[TMP110:%.*]] = and <16 x i32> [[TMP109]], splat (i32 65537)
; CHECK-NEXT: [[TMP111:%.*]] = mul <16 x i32> [[TMP110]], splat (i32 65535)
; CHECK-NEXT: [[TMP112:%.*]] = add <16 x i32> [[TMP111]], [[TMP97]]
; CHECK-NEXT: [[TMP113:%.*]] = xor <16 x i32> [[TMP112]], [[TMP108]]
; CHECK-NEXT: [[TMP114:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP113]])
; CHECK-NEXT: ret i32 [[TMP114]]
; CHECK-NEXT: [[TMP92:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1
; CHECK-NEXT: [[TMP95:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP98:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1
; CHECK-NEXT: [[TMP132:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1
; CHECK-NEXT: [[TMP135:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
; CHECK-NEXT: [[TMP138:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1
; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1
; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0)
; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4)
; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8)
; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12)
; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32>
; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1
; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0)
; CHECK-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4)
; CHECK-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP92]], i64 8)
; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP132]], i64 12)
; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32>
; CHECK-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> <i32 3, i32 7, i32 15, i32 11, i32 2, i32 6, i32 14, i32 10, i32 1, i32 5, i32 13, i32 9, i32 0, i32 4, i32 12, i32 8>
; CHECK-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2)
; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> <i32 3, i32 4, i32 poison, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 0, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 19, i32 poison, i32 4, i32 poison, i32 18, i32 poison, i32 8, i32 poison, i32 17, i32 poison, i32 12, i32 13, i32 16, i32 poison>
; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 poison, i32 6, i32 18, i32 8, i32 poison, i32 10, i32 17, i32 12, i32 13, i32 14, i32 16>
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP3]], i32 5
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP52]], i32 9
; CHECK-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32>
; CHECK-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1
; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0)
; CHECK-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4)
; CHECK-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP98]], i64 8)
; CHECK-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP138]], i64 12)
; CHECK-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32>
; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 3, i32 7, i32 15, i32 11, i32 2, i32 6, i32 14, i32 10, i32 1, i32 5, i32 13, i32 9, i32 0, i32 4, i32 12, i32 8>
; CHECK-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]]
; CHECK-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16)
; CHECK-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]]
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP70]]
; CHECK-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP70]]
; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; CHECK-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]]
; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 4, i32 21, i32 22, i32 7, i32 8, i32 25, i32 26, i32 11, i32 12, i32 29, i32 30, i32 15>
; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
; CHECK-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]]
; CHECK-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]]
; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> <i32 0, i32 20, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 28, i32 29, i32 30, i32 11, i32 24, i32 25, i32 26, i32 27>
; CHECK-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15)
; CHECK-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537)
; CHECK-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535)
; CHECK-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]]
; CHECK-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]]
; CHECK-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]])
; CHECK-NEXT: ret i32 [[ADD113_3]]
;
; THR15-LABEL: define i32 @test(
; THR15-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] {
@ -148,122 +104,78 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
; THR15-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
; THR15-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
; THR15-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4
; THR15-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1
; THR15-NEXT: [[TMP48:%.*]] = load i8, ptr null, align 1
; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr null, align 1
; THR15-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1
; THR15-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
; THR15-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1
; THR15-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; THR15-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]]
; THR15-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
; THR15-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32>
; THR15-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
; THR15-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i32>
; THR15-NEXT: [[TMP11:%.*]] = sub <4 x i32> [[TMP8]], [[TMP10]]
; THR15-NEXT: [[TMP12:%.*]] = shl <4 x i32> [[TMP11]], splat (i32 16)
; THR15-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
; THR15-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; THR15-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP13]]
; THR15-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP14]], [[TMP13]]
; THR15-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
; THR15-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; THR15-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP17]], [[TMP18]]
; THR15-NEXT: [[TMP20:%.*]] = sub <4 x i32> [[TMP17]], [[TMP18]]
; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; THR15-NEXT: [[TMP22:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1
; THR15-NEXT: [[TMP23:%.*]] = zext <4 x i8> [[TMP22]] to <4 x i32>
; THR15-NEXT: [[TMP24:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1
; THR15-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[TMP24]] to <4 x i32>
; THR15-NEXT: [[TMP26:%.*]] = sub <4 x i32> [[TMP23]], [[TMP25]]
; THR15-NEXT: [[TMP27:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
; THR15-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
; THR15-NEXT: [[TMP29:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
; THR15-NEXT: [[TMP30:%.*]] = zext <4 x i8> [[TMP29]] to <4 x i32>
; THR15-NEXT: [[TMP31:%.*]] = sub <4 x i32> [[TMP28]], [[TMP30]]
; THR15-NEXT: [[TMP32:%.*]] = shl <4 x i32> [[TMP31]], splat (i32 16)
; THR15-NEXT: [[TMP33:%.*]] = add <4 x i32> [[TMP32]], [[TMP26]]
; THR15-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP33]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; THR15-NEXT: [[TMP35:%.*]] = add <4 x i32> [[TMP34]], [[TMP33]]
; THR15-NEXT: [[TMP36:%.*]] = sub <4 x i32> [[TMP34]], [[TMP33]]
; THR15-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; THR15-NEXT: [[TMP39:%.*]] = add <4 x i32> [[TMP37]], [[TMP38]]
; THR15-NEXT: [[TMP40:%.*]] = sub <4 x i32> [[TMP37]], [[TMP38]]
; THR15-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; THR15-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
; THR15-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32>
; THR15-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
; THR15-NEXT: [[TMP45:%.*]] = zext <4 x i8> [[TMP44]] to <4 x i32>
; THR15-NEXT: [[TMP46:%.*]] = sub <4 x i32> [[TMP43]], [[TMP45]]
; THR15-NEXT: [[TMP47:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1
; THR15-NEXT: [[TMP48:%.*]] = zext <4 x i8> [[TMP47]] to <4 x i32>
; THR15-NEXT: [[TMP49:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1
; THR15-NEXT: [[TMP50:%.*]] = zext <4 x i8> [[TMP49]] to <4 x i32>
; THR15-NEXT: [[TMP51:%.*]] = sub <4 x i32> [[TMP48]], [[TMP50]]
; THR15-NEXT: [[TMP52:%.*]] = shl <4 x i32> [[TMP51]], splat (i32 16)
; THR15-NEXT: [[TMP53:%.*]] = add <4 x i32> [[TMP52]], [[TMP46]]
; THR15-NEXT: [[TMP54:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; THR15-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP53]]
; THR15-NEXT: [[TMP56:%.*]] = sub <4 x i32> [[TMP54]], [[TMP53]]
; THR15-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> <i32 2, i32 7, i32 0, i32 5>
; THR15-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; THR15-NEXT: [[TMP59:%.*]] = add <4 x i32> [[TMP57]], [[TMP58]]
; THR15-NEXT: [[TMP60:%.*]] = sub <4 x i32> [[TMP57]], [[TMP58]]
; THR15-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; THR15-NEXT: [[TMP62:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2)
; THR15-NEXT: [[TMP63:%.*]] = load <4 x i8>, ptr null, align 1
; THR15-NEXT: [[TMP64:%.*]] = zext <4 x i8> [[TMP63]] to <4 x i32>
; THR15-NEXT: [[TMP65:%.*]] = load <4 x i8>, ptr null, align 1
; THR15-NEXT: [[TMP66:%.*]] = zext <4 x i8> [[TMP65]] to <4 x i32>
; THR15-NEXT: [[TMP67:%.*]] = sub <4 x i32> [[TMP64]], [[TMP66]]
; THR15-NEXT: [[TMP68:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; THR15-NEXT: [[TMP69:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0
; THR15-NEXT: [[TMP70:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP0]], i32 1
; THR15-NEXT: [[TMP71:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP70]], <2 x i8> [[TMP62]], i64 2)
; THR15-NEXT: [[TMP72:%.*]] = zext <4 x i8> [[TMP71]] to <4 x i32>
; THR15-NEXT: [[TMP73:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1
; THR15-NEXT: [[TMP74:%.*]] = zext <4 x i8> [[TMP73]] to <4 x i32>
; THR15-NEXT: [[TMP75:%.*]] = shufflevector <4 x i32> [[TMP74]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; THR15-NEXT: [[TMP76:%.*]] = sub <4 x i32> [[TMP72]], [[TMP75]]
; THR15-NEXT: [[TMP77:%.*]] = shl <4 x i32> [[TMP76]], splat (i32 16)
; THR15-NEXT: [[TMP78:%.*]] = add <4 x i32> [[TMP77]], [[TMP68]]
; THR15-NEXT: [[TMP79:%.*]] = shufflevector <4 x i32> [[TMP78]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; THR15-NEXT: [[TMP80:%.*]] = add <4 x i32> [[TMP78]], [[TMP79]]
; THR15-NEXT: [[TMP81:%.*]] = sub <4 x i32> [[TMP78]], [[TMP79]]
; THR15-NEXT: [[TMP82:%.*]] = shufflevector <4 x i32> [[TMP80]], <4 x i32> [[TMP81]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; THR15-NEXT: [[TMP83:%.*]] = shufflevector <4 x i32> [[TMP82]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; THR15-NEXT: [[TMP84:%.*]] = add <4 x i32> [[TMP82]], [[TMP83]]
; THR15-NEXT: [[TMP85:%.*]] = sub <4 x i32> [[TMP82]], [[TMP83]]
; THR15-NEXT: [[TMP86:%.*]] = shufflevector <4 x i32> [[TMP84]], <4 x i32> [[TMP85]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; THR15-NEXT: [[TMP87:%.*]] = add <4 x i32> [[TMP41]], [[TMP21]]
; THR15-NEXT: [[TMP88:%.*]] = sub <4 x i32> [[TMP21]], [[TMP41]]
; THR15-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP90:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP89]], <4 x i32> [[TMP87]], i64 4)
; THR15-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]]
; THR15-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]]
; THR15-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP94:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP93]], <4 x i32> [[TMP91]], i64 4)
; THR15-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]]
; THR15-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]]
; THR15-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 12, i32 8, i32 13, i32 9, i32 14, i32 10, i32 15, i32 11>
; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> <i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP99:%.*]] = shufflevector <4 x i32> [[TMP43]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP100:%.*]] = shufflevector <16 x i32> [[TMP98]], <16 x i32> [[TMP99]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP101:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP102:%.*]] = shufflevector <16 x i32> [[TMP100]], <16 x i32> [[TMP101]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 18, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP103:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP104:%.*]] = shufflevector <16 x i32> [[TMP102]], <16 x i32> [[TMP103]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 poison, i32 18, i32 19>
; THR15-NEXT: [[TMP105:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP106:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 poison, i32 17, i32 poison, i32 12, i32 poison, i32 14, i32 15>
; THR15-NEXT: [[TMP107:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP106]], <16 x i32> [[TMP107]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 10, i32 17, i32 12, i32 18, i32 14, i32 15>
; THR15-NEXT: [[TMP109:%.*]] = lshr <16 x i32> [[TMP108]], splat (i32 15)
; THR15-NEXT: [[TMP110:%.*]] = and <16 x i32> [[TMP109]], splat (i32 65537)
; THR15-NEXT: [[TMP111:%.*]] = mul <16 x i32> [[TMP110]], splat (i32 65535)
; THR15-NEXT: [[TMP112:%.*]] = add <16 x i32> [[TMP111]], [[TMP97]]
; THR15-NEXT: [[TMP113:%.*]] = xor <16 x i32> [[TMP112]], [[TMP108]]
; THR15-NEXT: [[TMP114:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP113]])
; THR15-NEXT: ret i32 [[TMP114]]
; THR15-NEXT: [[TMP143:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1
; THR15-NEXT: [[TMP146:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
; THR15-NEXT: [[TMP147:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
; THR15-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1
; THR15-NEXT: [[TMP148:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1
; THR15-NEXT: [[TMP152:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
; THR15-NEXT: [[TMP153:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
; THR15-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
; THR15-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
; THR15-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1
; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1
; THR15-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1
; THR15-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0)
; THR15-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4)
; THR15-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8)
; THR15-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12)
; THR15-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32>
; THR15-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1
; THR15-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0)
; THR15-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4)
; THR15-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP143]], i64 8)
; THR15-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP148]], i64 12)
; THR15-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32>
; THR15-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]]
; THR15-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> <i32 3, i32 7, i32 15, i32 11, i32 2, i32 6, i32 14, i32 10, i32 1, i32 5, i32 13, i32 9, i32 0, i32 4, i32 12, i32 8>
; THR15-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2)
; THR15-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; THR15-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> <i32 3, i32 4, i32 poison, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 0, i32 5, i32 poison, i32 poison>
; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 19, i32 poison, i32 4, i32 poison, i32 18, i32 poison, i32 8, i32 poison, i32 17, i32 poison, i32 12, i32 13, i32 16, i32 poison>
; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; THR15-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 poison, i32 6, i32 18, i32 8, i32 poison, i32 10, i32 17, i32 12, i32 13, i32 14, i32 16>
; THR15-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP1]], i32 5
; THR15-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP48]], i32 9
; THR15-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32>
; THR15-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1
; THR15-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0)
; THR15-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4)
; THR15-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP147]], i64 8)
; THR15-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP153]], i64 12)
; THR15-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32>
; THR15-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 3, i32 7, i32 15, i32 11, i32 2, i32 6, i32 14, i32 10, i32 1, i32 5, i32 13, i32 9, i32 0, i32 4, i32 12, i32 8>
; THR15-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]]
; THR15-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16)
; THR15-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]]
; THR15-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
; THR15-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP70]]
; THR15-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP70]]
; THR15-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
; THR15-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; THR15-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP52]]
; THR15-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP52]]
; THR15-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; THR15-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; THR15-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]]
; THR15-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]]
; THR15-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 4, i32 21, i32 22, i32 7, i32 8, i32 25, i32 26, i32 11, i32 12, i32 29, i32 30, i32 15>
; THR15-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
; THR15-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]]
; THR15-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]]
; THR15-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
; THR15-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> <i32 0, i32 20, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 28, i32 29, i32 30, i32 11, i32 24, i32 25, i32 26, i32 27>
; THR15-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15)
; THR15-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537)
; THR15-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535)
; THR15-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]]
; THR15-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]]
; THR15-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]])
; THR15-NEXT: ret i32 [[ADD113_3]]
;
entry:
%0 = load i8, ptr %pix1, align 1

View File

@ -1022,8 +1022,10 @@ define i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) {
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[P_2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[Q_2]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP1]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP5]], <2 x i32> [[TMP3]], i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP7]], <2 x i32> [[TMP4]], i64 2)
; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])

View File

@ -1,45 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s
define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
; SSE2-LABEL: @sitofp_uitofp(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE2-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x float> [[TMP5]]
;
; SLM-LABEL: @sitofp_uitofp(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[TMP5]]
;
; AVX-LABEL: @sitofp_uitofp(
; AVX-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x float> [[TMP3]]
;
; AVX2-LABEL: @sitofp_uitofp(
; AVX2-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; AVX2-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x float> [[TMP3]]
;
; AVX512-LABEL: @sitofp_uitofp(
; AVX512-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x float> [[TMP3]]
; CHECK-LABEL: @sitofp_uitofp(
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; CHECK-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
@ -69,39 +41,11 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
}
define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
; SSE2-LABEL: @fptosi_fptoui(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE2-NEXT: [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32>
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x i32> [[TMP5]]
;
; SLM-LABEL: @fptosi_fptoui(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX-LABEL: @fptosi_fptoui(
; AVX-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX2-LABEL: @fptosi_fptoui(
; AVX2-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; AVX2-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX512-LABEL: @fptosi_fptoui(
; AVX512-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[TMP3]]
; CHECK-LABEL: @fptosi_fptoui(
; CHECK-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@ -131,39 +75,11 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
}
define <8 x float> @fneg_fabs(<8 x float> %a) {
; SSE2-LABEL: @fneg_fabs(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
; SSE2-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
; SSE2-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; SLM-LABEL: @fneg_fabs(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
; SLM-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
; SLM-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; AVX-LABEL: @fneg_fabs(
; AVX-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
; AVX-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; AVX2-LABEL: @fneg_fabs(
; AVX2-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
; AVX2-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; AVX512-LABEL: @fneg_fabs(
; AVX512-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
; AVX512-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x float> [[DOTUNCASTED]]
; CHECK-LABEL: @fneg_fabs(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
; CHECK-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@ -209,39 +125,11 @@ define <8 x float> @fneg_fabs(<8 x float> %a) {
}
define <8 x i32> @sext_zext(<8 x i16> %a) {
; SSE2-LABEL: @sext_zext(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE2-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x i32> [[TMP5]]
;
; SLM-LABEL: @sext_zext(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX-LABEL: @sext_zext(
; AVX-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; AVX-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX2-LABEL: @sext_zext(
; AVX2-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; AVX2-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX512-LABEL: @sext_zext(
; AVX512-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; AVX512-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[TMP3]]
; CHECK-LABEL: @sext_zext(
; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1

View File

@ -1,45 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s
define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
; SSE2-LABEL: @sitofp_uitofp(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE2-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x float> [[TMP5]]
;
; SLM-LABEL: @sitofp_uitofp(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[TMP5]]
;
; AVX-LABEL: @sitofp_uitofp(
; AVX-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x float> [[TMP3]]
;
; AVX2-LABEL: @sitofp_uitofp(
; AVX2-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; AVX2-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x float> [[TMP3]]
;
; AVX512-LABEL: @sitofp_uitofp(
; AVX512-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x float> [[TMP3]]
; CHECK-LABEL: @sitofp_uitofp(
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float>
; CHECK-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
@ -69,39 +41,11 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
}
define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
; SSE2-LABEL: @fptosi_fptoui(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE2-NEXT: [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32>
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x i32> [[TMP5]]
;
; SLM-LABEL: @fptosi_fptoui(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX-LABEL: @fptosi_fptoui(
; AVX-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX2-LABEL: @fptosi_fptoui(
; AVX2-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; AVX2-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX512-LABEL: @fptosi_fptoui(
; AVX512-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; AVX512-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[TMP3]]
; CHECK-LABEL: @fptosi_fptoui(
; CHECK-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[A]] to <8 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@ -131,39 +75,11 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
}
define <8 x float> @fneg_fabs(<8 x float> %a) {
; SSE2-LABEL: @fneg_fabs(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
; SSE2-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
; SSE2-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; SLM-LABEL: @fneg_fabs(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP3:%.*]] = fneg <4 x float> [[TMP1]]
; SLM-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP2]])
; SLM-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; AVX-LABEL: @fneg_fabs(
; AVX-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
; AVX-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; AVX2-LABEL: @fneg_fabs(
; AVX2-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
; AVX2-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
; AVX512-LABEL: @fneg_fabs(
; AVX512-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
; AVX512-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x float> [[DOTUNCASTED]]
; CHECK-LABEL: @fneg_fabs(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
; CHECK-NEXT: [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x float> [[DOTUNCASTED]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@ -209,39 +125,11 @@ define <8 x float> @fneg_fabs(<8 x float> %a) {
}
define <8 x i32> @sext_zext(<8 x i16> %a) {
; SSE2-LABEL: @sext_zext(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE2-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE2-NEXT: ret <8 x i32> [[TMP5]]
;
; SLM-LABEL: @sext_zext(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX-LABEL: @sext_zext(
; AVX-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; AVX-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX2-LABEL: @sext_zext(
; AVX2-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; AVX2-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX512-LABEL: @sext_zext(
; AVX512-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; AVX512-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[TMP3]]
; CHECK-LABEL: @sext_zext(
; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1

View File

@ -1,47 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @fadd_fsub_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; SSE-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[A]], [[B]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; SSE-NEXT: ret <8 x float> [[TMP5]]
;
; SLM-LABEL: @fadd_fsub_v8f32(
; SLM-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; SLM-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[A]], [[B]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; SLM-NEXT: ret <8 x float> [[TMP5]]
;
; AVX-LABEL: @fadd_fsub_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; AVX-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[A]], [[B]]
; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; AVX-NEXT: ret <8 x float> [[TMP5]]
;
; AVX2-LABEL: @fadd_fsub_v8f32(
; AVX2-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX2-NEXT: ret <8 x float> [[TMP3]]
;
; AVX512-LABEL: @fadd_fsub_v8f32(
; AVX512-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX512-NEXT: ret <8 x float> [[TMP3]]
; CHECK-LABEL: @fadd_fsub_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@ -79,43 +49,11 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
}
define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @fmul_fdiv_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; SSE-NEXT: [[TMP3:%.*]] = fmul <8 x float> [[A]], [[B]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; SSE-NEXT: ret <8 x float> [[TMP5]]
;
; SLM-LABEL: @fmul_fdiv_v8f32(
; SLM-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; SLM-NEXT: [[TMP3:%.*]] = fmul <8 x float> [[A]], [[B]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; SLM-NEXT: ret <8 x float> [[TMP5]]
;
; AVX-LABEL: @fmul_fdiv_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; AVX-NEXT: [[TMP3:%.*]] = fmul <8 x float> [[A]], [[B]]
; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; AVX-NEXT: ret <8 x float> [[TMP5]]
;
; AVX2-LABEL: @fmul_fdiv_v8f32(
; AVX2-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; AVX2-NEXT: [[TMP3:%.*]] = fmul <8 x float> [[A]], [[B]]
; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; AVX2-NEXT: ret <8 x float> [[TMP5]]
;
; AVX512-LABEL: @fmul_fdiv_v8f32(
; AVX512-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX512-NEXT: ret <8 x float> [[TMP3]]
; CHECK-LABEL: @fmul_fdiv_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@ -172,10 +110,6 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
; AVX-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX-NEXT: ret <4 x float> [[TMP1]]
;
; AVX2-LABEL: @fmul_fdiv_v4f32_const(
; AVX2-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX2-NEXT: ret <4 x float> [[TMP1]]
;
; AVX512-LABEL: @fmul_fdiv_v4f32_const(
; AVX512-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX512-NEXT: ret <4 x float> [[TMP1]]

View File

@ -1,47 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @fadd_fsub_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; SSE-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[A]], [[B]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; SSE-NEXT: ret <8 x float> [[TMP5]]
;
; SLM-LABEL: @fadd_fsub_v8f32(
; SLM-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; SLM-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[A]], [[B]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; SLM-NEXT: ret <8 x float> [[TMP5]]
;
; AVX-LABEL: @fadd_fsub_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; AVX-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[A]], [[B]]
; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; AVX-NEXT: ret <8 x float> [[TMP5]]
;
; AVX2-LABEL: @fadd_fsub_v8f32(
; AVX2-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX2-NEXT: ret <8 x float> [[TMP3]]
;
; AVX512-LABEL: @fadd_fsub_v8f32(
; AVX512-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX512-NEXT: ret <8 x float> [[TMP3]]
; CHECK-LABEL: @fadd_fsub_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@ -79,43 +49,11 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
}
define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @fmul_fdiv_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; SSE-NEXT: [[TMP3:%.*]] = fmul <8 x float> [[A]], [[B]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; SSE-NEXT: ret <8 x float> [[TMP5]]
;
; SLM-LABEL: @fmul_fdiv_v8f32(
; SLM-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; SLM-NEXT: [[TMP3:%.*]] = fmul <8 x float> [[A]], [[B]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; SLM-NEXT: ret <8 x float> [[TMP5]]
;
; AVX-LABEL: @fmul_fdiv_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; AVX-NEXT: [[TMP3:%.*]] = fmul <8 x float> [[A]], [[B]]
; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; AVX-NEXT: ret <8 x float> [[TMP5]]
;
; AVX2-LABEL: @fmul_fdiv_v8f32(
; AVX2-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
; AVX2-NEXT: [[TMP3:%.*]] = fmul <8 x float> [[A]], [[B]]
; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 5, i32 1, i32 2, i32 6, i32 7, i32 3>
; AVX2-NEXT: ret <8 x float> [[TMP5]]
;
; AVX512-LABEL: @fmul_fdiv_v8f32(
; AVX512-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX512-NEXT: ret <8 x float> [[TMP3]]
; CHECK-LABEL: @fmul_fdiv_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
@ -172,10 +110,6 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
; AVX-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX-NEXT: ret <4 x float> [[TMP1]]
;
; AVX2-LABEL: @fmul_fdiv_v4f32_const(
; AVX2-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX2-NEXT: ret <4 x float> [[TMP1]]
;
; AVX512-LABEL: @fmul_fdiv_v4f32_const(
; AVX512-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX512-NEXT: ret <4 x float> [[TMP1]]

View File

@ -7,39 +7,11 @@
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @add_sub_v8i32(
; SSE-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: ret <8 x i32> [[TMP5]]
;
; SLM-LABEL: @add_sub_v8i32(
; SLM-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX1-LABEL: @add_sub_v8i32(
; AVX1-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX1-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; AVX1-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX2-LABEL: @add_sub_v8i32(
; AVX2-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX512-LABEL: @add_sub_v8i32(
; AVX512-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[TMP3]]
; CHECK-LABEL: @add_sub_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
@ -134,16 +106,14 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: ret <8 x i32> [[TMP5]]
; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: ret <8 x i32> [[R71]]
;
; SLM-LABEL: @ashr_shl_v8i32(
; SLM-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SLM-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX1-LABEL: @ashr_shl_v8i32(
; AVX1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
@ -204,16 +174,16 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2)
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3)
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[TMP5]]
; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[R71]]
;
; SLM-LABEL: @ashr_shl_v8i32_const(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2)
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3)
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[R71]]
;
; AVX1-LABEL: @ashr_shl_v8i32_const(
; AVX1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
@ -531,49 +501,13 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
}
define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) {
; SSE-LABEL: @add_sub_v8i32_splat(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; SSE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]]
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[TMP7]]
;
; SLM-LABEL: @add_sub_v8i32_splat(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; SLM-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]]
; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[TMP7]]
;
; AVX1-LABEL: @add_sub_v8i32_splat(
; AVX1-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; AVX1-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX2-LABEL: @add_sub_v8i32_splat(
; AVX2-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; AVX2-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX512-LABEL: @add_sub_v8i32_splat(
; AVX512-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; AVX512-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[TMP5]]
; CHECK-LABEL: @add_sub_v8i32_splat(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP5]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1

View File

@ -7,39 +7,11 @@
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @add_sub_v8i32(
; SSE-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: ret <8 x i32> [[TMP5]]
;
; SLM-LABEL: @add_sub_v8i32(
; SLM-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX1-LABEL: @add_sub_v8i32(
; AVX1-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX1-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; AVX1-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX2-LABEL: @add_sub_v8i32(
; AVX2-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX2-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX512-LABEL: @add_sub_v8i32(
; AVX512-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[TMP3]]
; CHECK-LABEL: @add_sub_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = sub <8 x i32> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
@ -134,16 +106,14 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: ret <8 x i32> [[TMP5]]
; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SSE-NEXT: ret <8 x i32> [[R71]]
;
; SLM-LABEL: @ashr_shl_v8i32(
; SLM-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SLM-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX1-LABEL: @ashr_shl_v8i32(
; AVX1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
@ -204,16 +174,16 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2)
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3)
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[TMP5]]
; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[R71]]
;
; SLM-LABEL: @ashr_shl_v8i32_const(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2)
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3)
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[TMP5]]
; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[R71]]
;
; AVX1-LABEL: @ashr_shl_v8i32_const(
; AVX1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
@ -531,49 +501,13 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) {
}
define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) {
; SSE-LABEL: @add_sub_v8i32_splat(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; SSE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]]
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[TMP7]]
;
; SLM-LABEL: @add_sub_v8i32_splat(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; SLM-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]]
; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[TMP7]]
;
; AVX1-LABEL: @add_sub_v8i32_splat(
; AVX1-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; AVX1-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX2-LABEL: @add_sub_v8i32_splat(
; AVX2-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; AVX2-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX2-NEXT: ret <8 x i32> [[TMP5]]
;
; AVX512-LABEL: @add_sub_v8i32_splat(
; AVX512-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; AVX512-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[TMP5]]
; CHECK-LABEL: @add_sub_v8i32_splat(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP5]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1

View File

@ -7,7 +7,7 @@ define void @test() {
; CHECK-NEXT: [[ADD:%.*]] = add i32 1, 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[ADD]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[ICMP:%.*]] = icmp samesign ult i32 0, 0
; CHECK-NEXT: [[ICMP:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], i32 0, i32 0
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) null, i64 [[ZEXT]]
@ -16,8 +16,6 @@ define void @test() {
; CHECK-NEXT: [[CALL:%.*]] = call i32 null(<2 x double> zeroinitializer)
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[CALL]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP4]], <4 x i1> [[TMP1]], i64 4)
; CHECK-NEXT: ret void
;
bb:

View File

@ -15,8 +15,8 @@ define ptr @test(ptr %0, ptr %args_gep) {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[ARG26]], i64 17
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[ARG1]], i64 8
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[ARG1]], i64 12
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8, !noalias [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !noalias [[META0]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !noalias [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8, !noalias [[META0]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>

View File

@ -9,10 +9,10 @@ define void @test(ptr noalias %0, ptr noalias %1) {
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[TMP9]], align 16
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x double>, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP10]], <6 x i32> <i32 2, i32 4, i32 0, i32 3, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP7]], <6 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 5>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <6 x double> [[TMP12]], <6 x double> [[TMP14]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 10, i32 11>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <6 x i32> <i32 2, i32 4, i32 0, i32 3, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> [[TMP7]], <6 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 5>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <6 x double> [[TMP12]], <6 x double> [[TMP10]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 10, i32 11>
; CHECK-NEXT: store <6 x double> [[TMP13]], ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 40
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 8

View File

@ -286,8 +286,8 @@ define void @lookahead_limit_users_budget(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S
; CHECK-NEXT: [[A2:%.*]] = load double, ptr [[IDXA2]], align 8
; CHECK-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B]], align 8
; CHECK-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0

View File

@ -10,24 +10,22 @@ define i32 @bar() local_unnamed_addr {
; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef
; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef
; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef>, i32 [[SUB102_1]], i32 2
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[ADD94_1]], i32 3
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef>, i32 [[SUB86_1]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[ADD78_1]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[ADD78_2]], i32 5
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[SUB102_3]], i32 6
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef>, <8 x i32> <i32 8, i32 9, i32 3, i32 2, i32 5, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 14>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <8 x i32> [[TMP7]], [[TMP9]]
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP18]], <8 x i32> [[TMP10]], i64 8)
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 [[SUB102_1]], i32 4
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 5
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 6
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 7
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 9
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 undef, i32 undef, i32 poison>, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 7, i32 6, i32 5, i32 4, i32 24, i32 25, i32 26, i32 27, i32 poison, i32 29, i32 30, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[SUB102_3]], i32 12
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12>
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <16 x i32> [[TMP5]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i32> [[TMP5]], [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
; CHECK-NEXT: [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], splat (i32 15)
; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i32> [[TMP12]], splat (i32 65537)
; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], splat (i32 65535)
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP15:%.*]] = add <16 x i32> [[TMP14]], [[TMP20]]
; CHECK-NEXT: [[TMP15:%.*]] = add <16 x i32> [[TMP14]], [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i32> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP16]])
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP17]], 16

View File

@ -6,11 +6,11 @@ define i1 @foo() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TOBOOL_NOT_NOT509_I_2329_I_I:%.*]] = icmp ne i32 0, 0
; CHECK-NEXT: [[STOREMERGE_2333_I_I:%.*]] = select i1 [[TOBOOL_NOT_NOT509_I_2329_I_I]], i32 0, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[STOREMERGE_2333_I_I]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> zeroinitializer, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 0>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> <i1 false, i1 false, i1 undef, i1 undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 undef, i1 undef>, <4 x i1> [[TMP6]], i64 4)
; CHECK-NEXT: [[TOBOOL_NOT_NOT509_I_1_2_I_I:%.*]] = icmp ne i32 [[STOREMERGE_2333_I_I]], 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL_NOT_NOT509_I_1_2_I_I]], i32 4
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i1> [[TMP0]], i1 [[TOBOOL_NOT_NOT509_I_2329_I_I]], i32 5
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP1]], <4 x i1> zeroinitializer, i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> [[TMP2]], <2 x i1> zeroinitializer, i64 6)
; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]])
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP5]], i1 false

View File

@ -153,8 +153,8 @@ define float @foo3(ptr nocapture readonly %A) #0 {
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX14]], align 4
; CHECK-NEXT: [[TMP7]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX14]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 5, i32 1, i32 2, i32 poison>
@ -283,7 +283,7 @@ define void @test(ptr %i1, ptr %i2, ptr %o, i1 %arg) {
; CHECK-NEXT: [[I1_0:%.*]] = load x86_fp80, ptr [[I1:%.*]], align 16
; CHECK-NEXT: [[I1_GEP1:%.*]] = getelementptr x86_fp80, ptr [[I1]], i64 1
; CHECK-NEXT: [[I1_1:%.*]] = load x86_fp80, ptr [[I1_GEP1]], align 16
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[THEN:%.*]], label [[END:%.*]]
; CHECK-NEXT: br i1 %arg, label [[THEN:%.*]], label [[END:%.*]]
; CHECK: then:
; CHECK-NEXT: [[I2_0:%.*]] = load x86_fp80, ptr [[I2:%.*]], align 16
; CHECK-NEXT: [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, ptr [[I2]], i64 1

View File

@ -103,10 +103,10 @@ define void @test2(ptr %p1, ptr %p2) {
; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> <double 4.000000e+00, double 4.100000e+00>, [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], <double 2.000000e+00, double 2.100000e+00>
; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP10]], <double 3.000000e+00, double 3.100000e+00>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP15:%.*]], [[BB6:%.*]] ]
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x double> [ [[TMP11]], [[BB1]] ], [ [[TMP16:%.*]], [[BB6:%.*]] ]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[X0:%.*]] = getelementptr inbounds double, ptr [[P2:%.*]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, ptr [[X0]], align 8
; CHECK-NEXT: br i1 poison, label [[BB3:%.*]], label [[BB6]]
@ -117,7 +117,8 @@ define void @test2(ptr %p1, ptr %p2) {
; CHECK: bb5:
; CHECK-NEXT: br label [[BB6]]
; CHECK: bb6:
; CHECK-NEXT: [[TMP15]] = phi <2 x double> [ [[TMP13]], [[BB2]] ], [ [[TMP14]], [[BB4]] ], [ [[TMP14]], [[BB5]] ]
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x double> [ [[TMP13]], [[BB2]] ], [ [[TMP14]], [[BB4]] ], [ [[TMP14]], [[BB5]] ]
; CHECK-NEXT: [[TMP16]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: br label [[BB2]]
;
entry:

View File

@ -11,9 +11,9 @@ define void @test() {
; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i16> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP7]], <4 x i16> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 1, i32 4, i32 3, i32 6>
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i16> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = sub <4 x i16> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-4 < %s | FileCheck %s
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s
define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) {
; CHECK-LABEL: define i32 @test(
@ -13,7 +13,7 @@ define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) {
; CHECK-NEXT: br i1 false, label %[[D_EXIT_3]], label %[[D_EXIT_6:.*]]
; CHECK: [[D_EXIT_3]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[ENTRY]] ], [ poison, %[[IF_END_I_1]] ]
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[RETVAL_0_I_219]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 1, i32 0, i32 0>, i32 [[RETVAL_0_I_219]], i32 0
; CHECK-NEXT: br i1 [[TOBOOL_I_4]], label %[[D_EXIT_4:.*]], label %[[D_EXIT_6]]
; CHECK: [[D_EXIT_4]]:
; CHECK-NEXT: br label %[[D_EXIT_6]]
@ -21,29 +21,25 @@ define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) {
; CHECK-NEXT: br i1 false, label %[[D_EXIT_6]], label %[[D_EXIT_7:.*]]
; CHECK: [[D_EXIT_6]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP1]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ [[TMP1]], %[[D_EXIT_4]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP2]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[D_EXIT_4]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_5]] ], [ zeroinitializer, %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[D_EXIT_4]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP2]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[D_EXIT_4]] ]
; CHECK-NEXT: br label %[[D_EXIT_7]]
; CHECK: [[D_EXIT_7]]:
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP3]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP4]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP8]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP3]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ [[TMP4]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> <i32 1, i32 poison, i32 poison, i32 1, i32 poison, i32 1, i32 1, i32 poison>, <8 x i32> <i32 8, i32 1, i32 2, i32 11, i32 poison, i32 13, i32 14, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP0]], i32 4
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[RETVAL_0_I_219]], i32 7
; CHECK-NEXT: [[TMP12:%.*]] = add <8 x i32> [[TMP11]], [[TMP7]]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> <i32 poison, i32 1, i32 1, i32 poison>, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[RETVAL_0_I_219]], i32 3
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 1, i32 1>, i32 [[RETVAL_0_I_219]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP22:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP21]], <4 x i32> [[TMP10]], i64 4)
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0, i32 0, i32 poison>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP23]], <8 x i32> <i32 poison, i32 poison, i32 1, i32 1, i32 1, i32 poison, i32 poison, i32 1>, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 5, i32 6, i32 15>
; CHECK-NEXT: [[TMP19:%.*]] = add <8 x i32> [[TMP18]], [[TMP22]]
; CHECK-NEXT: [[TMP20:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP19]], i64 0)
; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i32> [[TMP20]], [[TMP16]]
; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP19]], <4 x i32> [[RDX_OP]], i64 0)
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP12]])
; CHECK-NEXT: ret i32 [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP16]])
; CHECK-NEXT: [[OP_RDX4:%.*]] = or i32 [[TMP18]], [[TMP17]]
; CHECK-NEXT: ret i32 [[OP_RDX4]]
;
entry:
%0 = load i32, ptr %f, align 4

View File

@ -7,14 +7,20 @@ define void @test(i1 %c, ptr %arg) {
; CHECK: if:
; CHECK-NEXT: [[ARG2_2:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 24
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARG]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARG2_2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP1]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP5]], <2 x i64> [[TMP2]], i64 2)
; CHECK-NEXT: br label [[JOIN:%.*]]
; CHECK: else:
; CHECK-NEXT: [[ARG_2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 24
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARG]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARG_2]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP7]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> [[TMP10]], i64 0)
; CHECK-NEXT: [[TMP12:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP11]], <2 x i64> [[TMP8]], i64 2)
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i64> [ [[TMP6]], [[IF]] ], [ [[TMP12]], [[ELSE]] ]

View File

@ -6,19 +6,23 @@ define i32 @a() {
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i8> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[BB1]] ]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[BB1]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i8> [ zeroinitializer, [[TMP0]] ], [ [[TMP17:%.*]], %[[BB1]] ]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
; CHECK-NEXT: [[TMP6]] = load <4 x i8>, ptr null, align 4
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i8> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP18:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP10]], <4 x i8> [[TMP6]], i64 4)
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP18]], <8 x i32> <i32 1, i32 2, i32 3, i32 12, i32 3, i32 12, i32 13, i32 14>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <8 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP10]], <8 x i8> [[TMP11]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP19]], <8 x i8> [[TMP18]], <8 x i32> <i32 1, i32 3, i32 2, i32 9, i32 3, i32 11, i32 9, i32 13>
; CHECK-NEXT: [[TMP22:%.*]] = xor <8 x i8> [[TMP18]], [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = xor <8 x i8> [[TMP22]], [[TMP5]]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[TMP23]], <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; CHECK-NEXT: store <8 x i8> [[TMP13]], ptr null, align 4
; CHECK-NEXT: store <8 x i8> [[TMP23]], ptr null, align 4
; CHECK-NEXT: [[TMP17]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: br label %[[BB1]]
;
br label %1

View File

@ -35,8 +35,8 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]

View File

@ -35,8 +35,8 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]

View File

@ -29,8 +29,8 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]

View File

@ -387,10 +387,14 @@ define void @reorder_alt_rightsubTree(ptr nocapture %c, ptr noalias nocapture re
define void @vec_shuff_reorder() #0 {
; CHECK-LABEL: @vec_shuff_reorder(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @fb, align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @fa, align 4
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr @fa, align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr @fb, align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP1]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP7]], <2 x float> [[TMP4]], i64 2)
; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = fsub <4 x float> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>