mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-24 06:26:07 +00:00
AMDGPU: Migrate more tests away from undef (#131314)
andorbitset.ll is interesting since it directly depends on the difference between poison and undef. Not sure it's useful to keep the version using poison, I assume none of this code makes it to codegen. si-spill-cf.ll was also a nasty case, which I doubt has been reproducing its original issue for a very long time. I had to reclaim an older version, replace some of the poison uses, and run simplify-cfg. There's a very slight change in the final CFG with this, but final the output is approximately the same as it used to be.
This commit is contained in:
parent
d9110858ee
commit
8cc6c2e80f
@ -513,115 +513,117 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
|
||||
; GFX908-LABEL: introduced_copy_to_sgpr:
|
||||
; GFX908: ; %bb.0: ; %bb
|
||||
; GFX908-NEXT: global_load_ushort v16, v[0:1], off glc
|
||||
; GFX908-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
||||
; GFX908-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
|
||||
; GFX908-NEXT: s_load_dword s7, s[8:9], 0x18
|
||||
; GFX908-NEXT: s_mov_b32 s6, 0
|
||||
; GFX908-NEXT: s_mov_b32 s9, s6
|
||||
; GFX908-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
|
||||
; GFX908-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x10
|
||||
; GFX908-NEXT: s_load_dword s0, s[8:9], 0x18
|
||||
; GFX908-NEXT: s_mov_b32 s12, 0
|
||||
; GFX908-NEXT: s_mov_b32 s9, s12
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s3
|
||||
; GFX908-NEXT: s_sub_i32 s8, 0, s3
|
||||
; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s7
|
||||
; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s7
|
||||
; GFX908-NEXT: s_sub_i32 s1, 0, s7
|
||||
; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s0
|
||||
; GFX908-NEXT: v_mov_b32_e32 v19, 0
|
||||
; GFX908-NEXT: v_rcp_iflag_f32_e32 v2, v0
|
||||
; GFX908-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX908-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
|
||||
; GFX908-NEXT: v_cvt_u32_f32_e32 v2, v2
|
||||
; GFX908-NEXT: v_readfirstlane_b32 s10, v2
|
||||
; GFX908-NEXT: s_mul_i32 s8, s8, s10
|
||||
; GFX908-NEXT: s_mul_hi_u32 s8, s10, s8
|
||||
; GFX908-NEXT: s_add_i32 s10, s10, s8
|
||||
; GFX908-NEXT: s_mul_hi_u32 s8, s2, s10
|
||||
; GFX908-NEXT: s_mul_i32 s10, s8, s3
|
||||
; GFX908-NEXT: s_sub_i32 s2, s2, s10
|
||||
; GFX908-NEXT: s_add_i32 s11, s8, 1
|
||||
; GFX908-NEXT: s_sub_i32 s10, s2, s3
|
||||
; GFX908-NEXT: s_cmp_ge_u32 s2, s3
|
||||
; GFX908-NEXT: s_cselect_b32 s8, s11, s8
|
||||
; GFX908-NEXT: s_cselect_b32 s2, s10, s2
|
||||
; GFX908-NEXT: s_add_i32 s10, s8, 1
|
||||
; GFX908-NEXT: s_cmp_ge_u32 s2, s3
|
||||
; GFX908-NEXT: s_cselect_b32 s8, s10, s8
|
||||
; GFX908-NEXT: s_lshr_b32 s7, s7, 16
|
||||
; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s7
|
||||
; GFX908-NEXT: s_lshl_b64 s[2:3], s[0:1], 5
|
||||
; GFX908-NEXT: s_lshl_b64 s[12:13], s[8:9], 5
|
||||
; GFX908-NEXT: s_lshl_b64 s[10:11], s[4:5], 5
|
||||
; GFX908-NEXT: s_or_b32 s10, s10, 28
|
||||
; GFX908-NEXT: v_readfirstlane_b32 s2, v2
|
||||
; GFX908-NEXT: s_mul_i32 s1, s1, s2
|
||||
; GFX908-NEXT: s_mul_hi_u32 s1, s2, s1
|
||||
; GFX908-NEXT: s_add_i32 s2, s2, s1
|
||||
; GFX908-NEXT: s_mul_hi_u32 s1, s6, s2
|
||||
; GFX908-NEXT: s_mul_i32 s2, s1, s7
|
||||
; GFX908-NEXT: s_sub_i32 s2, s6, s2
|
||||
; GFX908-NEXT: s_add_i32 s3, s1, 1
|
||||
; GFX908-NEXT: s_sub_i32 s6, s2, s7
|
||||
; GFX908-NEXT: s_cmp_ge_u32 s2, s7
|
||||
; GFX908-NEXT: s_cselect_b32 s1, s3, s1
|
||||
; GFX908-NEXT: s_cselect_b32 s2, s6, s2
|
||||
; GFX908-NEXT: s_add_i32 s3, s1, 1
|
||||
; GFX908-NEXT: s_cmp_ge_u32 s2, s7
|
||||
; GFX908-NEXT: s_cselect_b32 s8, s3, s1
|
||||
; GFX908-NEXT: s_lshr_b32 s2, s0, 16
|
||||
; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s2
|
||||
; GFX908-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
|
||||
; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
|
||||
; GFX908-NEXT: s_and_b64 s[0:1], exec, s[0:1]
|
||||
; GFX908-NEXT: s_or_b32 s14, s14, 28
|
||||
; GFX908-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_readfirstlane_b32 s7, v16
|
||||
; GFX908-NEXT: s_and_b32 s7, 0xffff, s7
|
||||
; GFX908-NEXT: s_mul_i32 s1, s1, s7
|
||||
; GFX908-NEXT: s_mul_hi_u32 s9, s0, s7
|
||||
; GFX908-NEXT: s_mul_i32 s0, s0, s7
|
||||
; GFX908-NEXT: s_add_i32 s1, s9, s1
|
||||
; GFX908-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
|
||||
; GFX908-NEXT: v_readfirstlane_b32 s2, v16
|
||||
; GFX908-NEXT: s_and_b32 s2, 0xffff, s2
|
||||
; GFX908-NEXT: s_mul_i32 s3, s5, s2
|
||||
; GFX908-NEXT: s_mul_hi_u32 s5, s4, s2
|
||||
; GFX908-NEXT: s_mul_i32 s2, s4, s2
|
||||
; GFX908-NEXT: s_add_i32 s3, s5, s3
|
||||
; GFX908-NEXT: s_lshl_b64 s[4:5], s[2:3], 5
|
||||
; GFX908-NEXT: s_branch .LBB3_2
|
||||
; GFX908-NEXT: .LBB3_1: ; %Flow20
|
||||
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[0:1]
|
||||
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[2:3]
|
||||
; GFX908-NEXT: s_cbranch_vccz .LBB3_12
|
||||
; GFX908-NEXT: .LBB3_2: ; %bb9
|
||||
; GFX908-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GFX908-NEXT: ; Child Loop BB3_5 Depth 2
|
||||
; GFX908-NEXT: s_mov_b64 s[16:17], -1
|
||||
; GFX908-NEXT: s_cbranch_scc0 .LBB3_10
|
||||
; GFX908-NEXT: s_mov_b64 s[18:19], -1
|
||||
; GFX908-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; GFX908-NEXT: s_cbranch_vccz .LBB3_10
|
||||
; GFX908-NEXT: ; %bb.3: ; %bb14
|
||||
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
|
||||
; GFX908-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
|
||||
; GFX908-NEXT: s_mov_b32 s7, s6
|
||||
; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1]
|
||||
; GFX908-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GFX908-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v6
|
||||
; GFX908-NEXT: v_mov_b32_e32 v6, s6
|
||||
; GFX908-NEXT: v_mov_b32_e32 v9, s7
|
||||
; GFX908-NEXT: v_mov_b32_e32 v5, s7
|
||||
; GFX908-NEXT: v_mov_b32_e32 v7, s7
|
||||
; GFX908-NEXT: v_mov_b32_e32 v8, s6
|
||||
; GFX908-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
|
||||
; GFX908-NEXT: v_cmp_gt_i64_e64 s[2:3], s[10:11], -1
|
||||
; GFX908-NEXT: s_mov_b32 s13, s12
|
||||
; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[2:3]
|
||||
; GFX908-NEXT: v_mov_b32_e32 v4, s12
|
||||
; GFX908-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v6
|
||||
; GFX908-NEXT: v_mov_b32_e32 v6, s12
|
||||
; GFX908-NEXT: v_mov_b32_e32 v8, s12
|
||||
; GFX908-NEXT: v_mov_b32_e32 v5, s13
|
||||
; GFX908-NEXT: v_mov_b32_e32 v7, s13
|
||||
; GFX908-NEXT: v_mov_b32_e32 v9, s13
|
||||
; GFX908-NEXT: v_cmp_lt_i64_e64 s[18:19], s[10:11], 0
|
||||
; GFX908-NEXT: v_mov_b32_e32 v11, v5
|
||||
; GFX908-NEXT: s_mov_b64 s[18:19], s[10:11]
|
||||
; GFX908-NEXT: s_mov_b64 s[20:21], s[14:15]
|
||||
; GFX908-NEXT: v_mov_b32_e32 v10, v4
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_readfirstlane_b32 s7, v2
|
||||
; GFX908-NEXT: v_readfirstlane_b32 s9, v3
|
||||
; GFX908-NEXT: s_add_u32 s7, s7, 1
|
||||
; GFX908-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GFX908-NEXT: s_mul_hi_u32 s20, s2, s7
|
||||
; GFX908-NEXT: s_mul_i32 s9, s2, s9
|
||||
; GFX908-NEXT: s_mul_i32 s21, s3, s7
|
||||
; GFX908-NEXT: s_add_i32 s9, s20, s9
|
||||
; GFX908-NEXT: s_mul_i32 s7, s2, s7
|
||||
; GFX908-NEXT: s_add_i32 s9, s9, s21
|
||||
; GFX908-NEXT: v_readfirstlane_b32 s9, v2
|
||||
; GFX908-NEXT: v_readfirstlane_b32 s13, v3
|
||||
; GFX908-NEXT: s_add_u32 s9, s9, 1
|
||||
; GFX908-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX908-NEXT: s_mul_hi_u32 s22, s6, s9
|
||||
; GFX908-NEXT: s_mul_i32 s13, s6, s13
|
||||
; GFX908-NEXT: s_mul_i32 s23, s7, s9
|
||||
; GFX908-NEXT: s_add_i32 s13, s22, s13
|
||||
; GFX908-NEXT: s_mul_i32 s9, s6, s9
|
||||
; GFX908-NEXT: s_add_i32 s13, s13, s23
|
||||
; GFX908-NEXT: s_branch .LBB3_5
|
||||
; GFX908-NEXT: .LBB3_4: ; %bb58
|
||||
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
|
||||
; GFX908-NEXT: v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GFX908-NEXT: s_add_u32 s18, s18, s14
|
||||
; GFX908-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[2:3]
|
||||
; GFX908-NEXT: s_addc_u32 s19, s19, s15
|
||||
; GFX908-NEXT: s_mov_b64 s[20:21], 0
|
||||
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
|
||||
; GFX908-NEXT: s_add_u32 s20, s20, s4
|
||||
; GFX908-NEXT: v_cmp_lt_i64_e64 s[24:25], -1, v[2:3]
|
||||
; GFX908-NEXT: s_addc_u32 s21, s21, s5
|
||||
; GFX908-NEXT: s_mov_b64 s[22:23], 0
|
||||
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[24:25]
|
||||
; GFX908-NEXT: s_cbranch_vccz .LBB3_9
|
||||
; GFX908-NEXT: .LBB3_5: ; %bb16
|
||||
; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1
|
||||
; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GFX908-NEXT: s_add_u32 s20, s18, s7
|
||||
; GFX908-NEXT: s_addc_u32 s21, s19, s9
|
||||
; GFX908-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc
|
||||
; GFX908-NEXT: s_add_u32 s22, s20, s9
|
||||
; GFX908-NEXT: s_addc_u32 s23, s21, s13
|
||||
; GFX908-NEXT: global_load_dword v21, v19, s[22:23] offset:-12 glc
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc
|
||||
; GFX908-NEXT: global_load_dword v20, v19, s[22:23] offset:-8 glc
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: global_load_dword v12, v19, s[20:21] offset:-4 glc
|
||||
; GFX908-NEXT: global_load_dword v12, v19, s[22:23] offset:-4 glc
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: global_load_dword v12, v19, s[20:21] glc
|
||||
; GFX908-NEXT: global_load_dword v12, v19, s[22:23] glc
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: ds_read_b64 v[12:13], v19
|
||||
; GFX908-NEXT: ds_read_b64 v[14:15], v0
|
||||
; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1]
|
||||
; GFX908-NEXT: s_and_b64 vcc, exec, s[2:3]
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: s_cbranch_vccnz .LBB3_7
|
||||
; GFX908-NEXT: ; %bb.6: ; %bb51
|
||||
@ -648,28 +650,28 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
|
||||
; GFX908-NEXT: v_add_f32_e32 v11, v11, v13
|
||||
; GFX908-NEXT: s_branch .LBB3_4
|
||||
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
|
||||
; GFX908-NEXT: s_mov_b64 s[20:21], s[16:17]
|
||||
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21]
|
||||
; GFX908-NEXT: s_mov_b64 s[22:23], s[18:19]
|
||||
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
|
||||
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
|
||||
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX908-NEXT: s_mov_b64 s[20:21], -1
|
||||
; GFX908-NEXT: s_mov_b64 s[22:23], -1
|
||||
; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; GFX908-NEXT: ; implicit-def: $sgpr18_sgpr19
|
||||
; GFX908-NEXT: ; implicit-def: $sgpr20_sgpr21
|
||||
; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard
|
||||
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX908-NEXT: s_xor_b64 s[16:17], s[20:21], -1
|
||||
; GFX908-NEXT: s_xor_b64 s[18:19], s[22:23], -1
|
||||
; GFX908-NEXT: .LBB3_10: ; %Flow19
|
||||
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX908-NEXT: s_mov_b64 s[0:1], -1
|
||||
; GFX908-NEXT: s_and_b64 vcc, exec, s[16:17]
|
||||
; GFX908-NEXT: s_mov_b64 s[2:3], -1
|
||||
; GFX908-NEXT: s_and_b64 vcc, exec, s[18:19]
|
||||
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
|
||||
; GFX908-NEXT: ; %bb.11: ; %bb12
|
||||
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX908-NEXT: s_add_u32 s4, s4, s8
|
||||
; GFX908-NEXT: s_addc_u32 s5, s5, 0
|
||||
; GFX908-NEXT: s_add_u32 s10, s10, s12
|
||||
; GFX908-NEXT: s_addc_u32 s11, s11, s13
|
||||
; GFX908-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX908-NEXT: s_add_u32 s10, s10, s8
|
||||
; GFX908-NEXT: s_addc_u32 s11, s11, 0
|
||||
; GFX908-NEXT: s_add_u32 s14, s14, s16
|
||||
; GFX908-NEXT: s_addc_u32 s15, s15, s17
|
||||
; GFX908-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX908-NEXT: s_branch .LBB3_1
|
||||
; GFX908-NEXT: .LBB3_12: ; %DummyReturnBlock
|
||||
; GFX908-NEXT: s_endpgm
|
||||
@ -677,111 +679,113 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
|
||||
; GFX90A-LABEL: introduced_copy_to_sgpr:
|
||||
; GFX90A: ; %bb.0: ; %bb
|
||||
; GFX90A-NEXT: global_load_ushort v18, v[0:1], off glc
|
||||
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
||||
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
|
||||
; GFX90A-NEXT: s_load_dword s7, s[8:9], 0x18
|
||||
; GFX90A-NEXT: s_mov_b32 s6, 0
|
||||
; GFX90A-NEXT: s_mov_b32 s9, s6
|
||||
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
|
||||
; GFX90A-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x10
|
||||
; GFX90A-NEXT: s_load_dword s0, s[8:9], 0x18
|
||||
; GFX90A-NEXT: s_mov_b32 s12, 0
|
||||
; GFX90A-NEXT: s_mov_b32 s9, s12
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3
|
||||
; GFX90A-NEXT: s_sub_i32 s8, 0, s3
|
||||
; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s7
|
||||
; GFX90A-NEXT: s_sub_i32 s1, 0, s7
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v19, 0
|
||||
; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], 0, 0
|
||||
; GFX90A-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
|
||||
; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v2
|
||||
; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s7
|
||||
; GFX90A-NEXT: v_readfirstlane_b32 s10, v3
|
||||
; GFX90A-NEXT: s_mul_i32 s8, s8, s10
|
||||
; GFX90A-NEXT: s_mul_hi_u32 s8, s10, s8
|
||||
; GFX90A-NEXT: s_add_i32 s10, s10, s8
|
||||
; GFX90A-NEXT: s_mul_hi_u32 s8, s2, s10
|
||||
; GFX90A-NEXT: s_mul_i32 s10, s8, s3
|
||||
; GFX90A-NEXT: s_sub_i32 s2, s2, s10
|
||||
; GFX90A-NEXT: s_add_i32 s11, s8, 1
|
||||
; GFX90A-NEXT: s_sub_i32 s10, s2, s3
|
||||
; GFX90A-NEXT: s_cmp_ge_u32 s2, s3
|
||||
; GFX90A-NEXT: s_cselect_b32 s8, s11, s8
|
||||
; GFX90A-NEXT: s_cselect_b32 s2, s10, s2
|
||||
; GFX90A-NEXT: s_add_i32 s10, s8, 1
|
||||
; GFX90A-NEXT: s_cmp_ge_u32 s2, s3
|
||||
; GFX90A-NEXT: s_cselect_b32 s8, s10, s8
|
||||
; GFX90A-NEXT: s_lshr_b32 s7, s7, 16
|
||||
; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s7
|
||||
; GFX90A-NEXT: s_lshl_b64 s[2:3], s[0:1], 5
|
||||
; GFX90A-NEXT: s_lshl_b64 s[12:13], s[8:9], 5
|
||||
; GFX90A-NEXT: s_lshl_b64 s[10:11], s[4:5], 5
|
||||
; GFX90A-NEXT: s_or_b32 s10, s10, 28
|
||||
; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s0
|
||||
; GFX90A-NEXT: v_readfirstlane_b32 s2, v3
|
||||
; GFX90A-NEXT: s_mul_i32 s1, s1, s2
|
||||
; GFX90A-NEXT: s_mul_hi_u32 s1, s2, s1
|
||||
; GFX90A-NEXT: s_add_i32 s2, s2, s1
|
||||
; GFX90A-NEXT: s_mul_hi_u32 s1, s6, s2
|
||||
; GFX90A-NEXT: s_mul_i32 s2, s1, s7
|
||||
; GFX90A-NEXT: s_sub_i32 s2, s6, s2
|
||||
; GFX90A-NEXT: s_add_i32 s3, s1, 1
|
||||
; GFX90A-NEXT: s_sub_i32 s6, s2, s7
|
||||
; GFX90A-NEXT: s_cmp_ge_u32 s2, s7
|
||||
; GFX90A-NEXT: s_cselect_b32 s1, s3, s1
|
||||
; GFX90A-NEXT: s_cselect_b32 s2, s6, s2
|
||||
; GFX90A-NEXT: s_add_i32 s3, s1, 1
|
||||
; GFX90A-NEXT: s_cmp_ge_u32 s2, s7
|
||||
; GFX90A-NEXT: s_cselect_b32 s8, s3, s1
|
||||
; GFX90A-NEXT: s_lshr_b32 s2, s0, 16
|
||||
; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s2
|
||||
; GFX90A-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
|
||||
; GFX90A-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
|
||||
; GFX90A-NEXT: s_and_b64 s[0:1], exec, s[0:1]
|
||||
; GFX90A-NEXT: s_or_b32 s14, s14, 28
|
||||
; GFX90A-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_readfirstlane_b32 s7, v18
|
||||
; GFX90A-NEXT: s_and_b32 s7, 0xffff, s7
|
||||
; GFX90A-NEXT: s_mul_i32 s1, s1, s7
|
||||
; GFX90A-NEXT: s_mul_hi_u32 s9, s0, s7
|
||||
; GFX90A-NEXT: s_mul_i32 s0, s0, s7
|
||||
; GFX90A-NEXT: s_add_i32 s1, s9, s1
|
||||
; GFX90A-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
|
||||
; GFX90A-NEXT: v_readfirstlane_b32 s2, v18
|
||||
; GFX90A-NEXT: s_and_b32 s2, 0xffff, s2
|
||||
; GFX90A-NEXT: s_mul_i32 s3, s5, s2
|
||||
; GFX90A-NEXT: s_mul_hi_u32 s5, s4, s2
|
||||
; GFX90A-NEXT: s_mul_i32 s2, s4, s2
|
||||
; GFX90A-NEXT: s_add_i32 s3, s5, s3
|
||||
; GFX90A-NEXT: s_lshl_b64 s[4:5], s[2:3], 5
|
||||
; GFX90A-NEXT: s_branch .LBB3_2
|
||||
; GFX90A-NEXT: .LBB3_1: ; %Flow20
|
||||
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[0:1]
|
||||
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_vccz .LBB3_12
|
||||
; GFX90A-NEXT: .LBB3_2: ; %bb9
|
||||
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
|
||||
; GFX90A-NEXT: ; Child Loop BB3_5 Depth 2
|
||||
; GFX90A-NEXT: s_mov_b64 s[16:17], -1
|
||||
; GFX90A-NEXT: s_cbranch_scc0 .LBB3_10
|
||||
; GFX90A-NEXT: s_mov_b64 s[18:19], -1
|
||||
; GFX90A-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; GFX90A-NEXT: s_cbranch_vccz .LBB3_10
|
||||
; GFX90A-NEXT: ; %bb.3: ; %bb14
|
||||
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
|
||||
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
|
||||
; GFX90A-NEXT: s_mov_b32 s7, s6
|
||||
; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v8
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[6:7], s[6:7] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
|
||||
; GFX90A-NEXT: s_mov_b64 s[18:19], s[10:11]
|
||||
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[2:3], s[10:11], -1
|
||||
; GFX90A-NEXT: s_mov_b32 s13, s12
|
||||
; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[12:13], s[12:13] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v8
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[12:13], s[12:13] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[12:13], s[12:13] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[18:19], s[10:11], 0
|
||||
; GFX90A-NEXT: s_mov_b64 s[20:21], s[14:15]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_readfirstlane_b32 s7, v4
|
||||
; GFX90A-NEXT: v_readfirstlane_b32 s9, v5
|
||||
; GFX90A-NEXT: s_add_u32 s7, s7, 1
|
||||
; GFX90A-NEXT: s_addc_u32 s9, s9, 0
|
||||
; GFX90A-NEXT: s_mul_hi_u32 s20, s2, s7
|
||||
; GFX90A-NEXT: s_mul_i32 s9, s2, s9
|
||||
; GFX90A-NEXT: s_mul_i32 s21, s3, s7
|
||||
; GFX90A-NEXT: s_add_i32 s9, s20, s9
|
||||
; GFX90A-NEXT: s_mul_i32 s7, s2, s7
|
||||
; GFX90A-NEXT: s_add_i32 s9, s9, s21
|
||||
; GFX90A-NEXT: v_readfirstlane_b32 s9, v4
|
||||
; GFX90A-NEXT: v_readfirstlane_b32 s13, v5
|
||||
; GFX90A-NEXT: s_add_u32 s9, s9, 1
|
||||
; GFX90A-NEXT: s_addc_u32 s13, s13, 0
|
||||
; GFX90A-NEXT: s_mul_hi_u32 s22, s6, s9
|
||||
; GFX90A-NEXT: s_mul_i32 s13, s6, s13
|
||||
; GFX90A-NEXT: s_mul_i32 s23, s7, s9
|
||||
; GFX90A-NEXT: s_add_i32 s13, s22, s13
|
||||
; GFX90A-NEXT: s_mul_i32 s9, s6, s9
|
||||
; GFX90A-NEXT: s_add_i32 s13, s13, s23
|
||||
; GFX90A-NEXT: s_branch .LBB3_5
|
||||
; GFX90A-NEXT: .LBB3_4: ; %bb58
|
||||
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
|
||||
; GFX90A-NEXT: v_add_co_u32_sdwa v4, vcc, v4, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
|
||||
; GFX90A-NEXT: s_add_u32 s18, s18, s14
|
||||
; GFX90A-NEXT: s_addc_u32 s19, s19, s15
|
||||
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[4:5]
|
||||
; GFX90A-NEXT: s_mov_b64 s[20:21], 0
|
||||
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
|
||||
; GFX90A-NEXT: s_add_u32 s20, s20, s4
|
||||
; GFX90A-NEXT: s_addc_u32 s21, s21, s5
|
||||
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[24:25], -1, v[4:5]
|
||||
; GFX90A-NEXT: s_mov_b64 s[22:23], 0
|
||||
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[24:25]
|
||||
; GFX90A-NEXT: s_cbranch_vccz .LBB3_9
|
||||
; GFX90A-NEXT: .LBB3_5: ; %bb16
|
||||
; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1
|
||||
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; GFX90A-NEXT: s_add_u32 s20, s18, s7
|
||||
; GFX90A-NEXT: s_addc_u32 s21, s19, s9
|
||||
; GFX90A-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc
|
||||
; GFX90A-NEXT: s_add_u32 s22, s20, s9
|
||||
; GFX90A-NEXT: s_addc_u32 s23, s21, s13
|
||||
; GFX90A-NEXT: global_load_dword v21, v19, s[22:23] offset:-12 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc
|
||||
; GFX90A-NEXT: global_load_dword v20, v19, s[22:23] offset:-8 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] offset:-4 glc
|
||||
; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] offset:-4 glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] glc
|
||||
; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: ds_read_b64 v[14:15], v19
|
||||
; GFX90A-NEXT: ds_read_b64 v[16:17], v0
|
||||
; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1]
|
||||
; GFX90A-NEXT: ; kill: killed $sgpr20 killed $sgpr21
|
||||
; GFX90A-NEXT: s_and_b64 vcc, exec, s[2:3]
|
||||
; GFX90A-NEXT: ; kill: killed $sgpr22 killed $sgpr23
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_7
|
||||
; GFX90A-NEXT: ; %bb.6: ; %bb51
|
||||
@ -800,28 +804,28 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
|
||||
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
|
||||
; GFX90A-NEXT: s_branch .LBB3_4
|
||||
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
|
||||
; GFX90A-NEXT: s_mov_b64 s[20:21], s[16:17]
|
||||
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21]
|
||||
; GFX90A-NEXT: s_mov_b64 s[22:23], s[18:19]
|
||||
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
|
||||
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
|
||||
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX90A-NEXT: s_mov_b64 s[20:21], -1
|
||||
; GFX90A-NEXT: s_mov_b64 s[22:23], -1
|
||||
; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; GFX90A-NEXT: ; implicit-def: $sgpr18_sgpr19
|
||||
; GFX90A-NEXT: ; implicit-def: $sgpr20_sgpr21
|
||||
; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard
|
||||
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX90A-NEXT: s_xor_b64 s[16:17], s[20:21], -1
|
||||
; GFX90A-NEXT: s_xor_b64 s[18:19], s[22:23], -1
|
||||
; GFX90A-NEXT: .LBB3_10: ; %Flow19
|
||||
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX90A-NEXT: s_mov_b64 s[0:1], -1
|
||||
; GFX90A-NEXT: s_and_b64 vcc, exec, s[16:17]
|
||||
; GFX90A-NEXT: s_mov_b64 s[2:3], -1
|
||||
; GFX90A-NEXT: s_and_b64 vcc, exec, s[18:19]
|
||||
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
|
||||
; GFX90A-NEXT: ; %bb.11: ; %bb12
|
||||
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX90A-NEXT: s_add_u32 s4, s4, s8
|
||||
; GFX90A-NEXT: s_addc_u32 s5, s5, 0
|
||||
; GFX90A-NEXT: s_add_u32 s10, s10, s12
|
||||
; GFX90A-NEXT: s_addc_u32 s11, s11, s13
|
||||
; GFX90A-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX90A-NEXT: s_add_u32 s10, s10, s8
|
||||
; GFX90A-NEXT: s_addc_u32 s11, s11, 0
|
||||
; GFX90A-NEXT: s_add_u32 s14, s14, s16
|
||||
; GFX90A-NEXT: s_addc_u32 s15, s15, s17
|
||||
; GFX90A-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX90A-NEXT: s_branch .LBB3_1
|
||||
; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
@ -834,7 +838,8 @@ bb:
|
||||
|
||||
bb9: ; preds = %bb12, %bb
|
||||
%i10 = phi i64 [ %arg3, %bb ], [ %i13, %bb12 ]
|
||||
br i1 undef, label %bb14, label %bb12
|
||||
%undef = freeze i1 poison
|
||||
br i1 %undef, label %bb14, label %bb12
|
||||
|
||||
bb12: ; preds = %bb58, %bb9
|
||||
%i13 = add nuw nsw i64 %i10, %i8
|
||||
|
@ -55,7 +55,7 @@ define <2 x i32> @select_sdiv_lhs_const_v2i32(i1 %cond) {
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x594
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%select = select i1 %cond, <2 x i32> <i32 5, i32 undef>, <2 x i32> <i32 6, i32 7>
|
||||
%select = select i1 %cond, <2 x i32> <i32 5, i32 poison>, <2 x i32> <i32 6, i32 7>
|
||||
%op = sdiv <2 x i32> <i32 3333, i32 9999>, %select
|
||||
ret <2 x i32> %op
|
||||
}
|
||||
|
@ -106,8 +106,8 @@ define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
|
||||
@gv = external addrspace(1) global i32
|
||||
|
||||
; Make sure there's no verifier error with an undef source.
|
||||
define void @bitset_verifier_error() local_unnamed_addr #0 {
|
||||
; SI-LABEL: bitset_verifier_error:
|
||||
define void @bitset_verifier_error_freeze_poison() local_unnamed_addr #0 {
|
||||
; SI-LABEL: bitset_verifier_error_freeze_poison:
|
||||
; SI: ; %bb.0: ; %bb
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SI-NEXT: s_getpc_b64 s[4:5]
|
||||
@ -128,13 +128,40 @@ define void @bitset_verifier_error() local_unnamed_addr #0 {
|
||||
; SI-NEXT: ; %bb.1: ; %bb5
|
||||
; SI-NEXT: .LBB6_2: ; %bb6
|
||||
bb:
|
||||
%i = call float @llvm.fabs.f32(float undef) #0
|
||||
%undef0 = freeze float poison
|
||||
%i = call float @llvm.fabs.f32(float %undef0) #0
|
||||
%i1 = bitcast float %i to i32
|
||||
store i32 %i1, ptr addrspace(1) @gv
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
%i3 = call float @llvm.fabs.f32(float undef) #0
|
||||
%undef1 = freeze float poison
|
||||
%i3 = call float @llvm.fabs.f32(float %undef1) #0
|
||||
%i4 = fcmp fast ult float %i3, 0x3FEFF7CEE0000000
|
||||
br i1 %i4, label %bb5, label %bb6
|
||||
|
||||
bb5:
|
||||
unreachable
|
||||
|
||||
bb6:
|
||||
unreachable
|
||||
}
|
||||
|
||||
define void @bitset_verifier_error_poison() local_unnamed_addr #0 {
|
||||
; SI-LABEL: bitset_verifier_error_poison:
|
||||
; SI: ; %bb.0: ; %bb
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SI-NEXT: s_cbranch_scc1 .LBB7_2
|
||||
; SI-NEXT: ; %bb.1: ; %bb5
|
||||
; SI-NEXT: .LBB7_2: ; %bb6
|
||||
bb:
|
||||
%i = call float @llvm.fabs.f32(float poison) #0
|
||||
%i1 = bitcast float %i to i32
|
||||
store i32 %i1, ptr addrspace(1) @gv
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
%i3 = call float @llvm.fabs.f32(float poison) #0
|
||||
%i4 = fcmp fast ult float %i3, 0x3FEFF7CEE0000000
|
||||
br i1 %i4, label %bb5, label %bb6
|
||||
|
||||
|
@ -37,11 +37,12 @@ bb2:
|
||||
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
|
||||
bb0:
|
||||
%tmp = icmp sgt i32 %arg1, 4
|
||||
%undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 undef)
|
||||
%tmp4 = select i1 %undef, float %arg, float 1.000000e+00
|
||||
%mask = freeze i32 poison
|
||||
%undef0 = call i1 @llvm.amdgcn.class.f32(float poison, i32 %mask)
|
||||
%tmp4 = select i1 %undef0, float %arg, float 1.000000e+00
|
||||
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
|
||||
%tmp6 = fcmp olt float %arg2, 1.000000e+00
|
||||
%tmp7 = fcmp olt float %arg, undef
|
||||
%tmp7 = fcmp olt float %arg, poison
|
||||
%tmp8 = and i1 %tmp5, %tmp6
|
||||
%tmp9 = and i1 %tmp8, %tmp7
|
||||
br i1 %tmp9, label %bb1, label %bb2
|
||||
|
@ -4,29 +4,34 @@
|
||||
|
||||
; Test that unused lanes in the s_xor result are masked out with v_cndmask.
|
||||
|
||||
define i32 @combine_add_zext_xor() {
|
||||
define i32 @combine_add_zext_xor(i32 inreg %cond) {
|
||||
; GFX1010-LABEL: combine_add_zext_xor:
|
||||
; GFX1010: ; %bb.0: ; %.entry
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
|
||||
; GFX1010-NEXT: s_branch .LBB0_2
|
||||
; GFX1010-NEXT: .LBB0_1: ; %bb9
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GFX1010-NEXT: s_xor_b32 s4, s4, -1
|
||||
; GFX1010-NEXT: s_xor_b32 s5, s5, -1
|
||||
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5
|
||||
; GFX1010-NEXT: v_add_nc_u32_e32 v2, v1, v0
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX1010-NEXT: s_cbranch_vccz .LBB0_4
|
||||
; GFX1010-NEXT: .LBB0_2: ; %.a
|
||||
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr4
|
||||
; GFX1010-NEXT: s_cbranch_scc1 .LBB0_1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr5
|
||||
; GFX1010-NEXT: s_cbranch_vccnz .LBB0_1
|
||||
; GFX1010-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GFX1010-NEXT: buffer_load_dword v0, v1, s[4:7], 64 offen glc
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s4, 0, v0
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
|
||||
; GFX1010-NEXT: s_branch .LBB0_1
|
||||
; GFX1010-NEXT: .LBB0_4: ; %.exit
|
||||
; GFX1010-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -34,27 +39,32 @@ define i32 @combine_add_zext_xor() {
|
||||
; GFX1100-LABEL: combine_add_zext_xor:
|
||||
; GFX1100: ; %bb.0: ; %.entry
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
|
||||
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
|
||||
; GFX1100-NEXT: s_branch .LBB0_2
|
||||
; GFX1100-NEXT: .LBB0_1: ; %bb9
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GFX1100-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: s_xor_b32 s1, s1, -1
|
||||
; GFX1100-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
|
||||
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_add_nc_u32_e32 v2, v1, v0
|
||||
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX1100-NEXT: s_cbranch_vccz .LBB0_4
|
||||
; GFX1100-NEXT: .LBB0_2: ; %.a
|
||||
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr0
|
||||
; GFX1100-NEXT: s_cbranch_scc1 .LBB0_1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr1
|
||||
; GFX1100-NEXT: s_cbranch_vccnz .LBB0_1
|
||||
; GFX1100-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GFX1100-NEXT: buffer_load_b32 v0, v1, s[0:3], 64 offen glc
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
|
||||
; GFX1100-NEXT: s_branch .LBB0_1
|
||||
; GFX1100-NEXT: .LBB0_4: ; %.exit
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -63,7 +73,8 @@ define i32 @combine_add_zext_xor() {
|
||||
|
||||
.a: ; preds = %bb9, %.entry
|
||||
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
|
||||
br i1 poison, label %bb9, label %bb
|
||||
%cmp = icmp eq i32 %cond, 0
|
||||
br i1 %cmp, label %bb9, label %bb
|
||||
|
||||
bb: ; preds = %.a
|
||||
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
|
||||
@ -84,29 +95,34 @@ bb9: ; preds = %bb, %.a
|
||||
|
||||
; Test that unused lanes in the s_xor result are masked out with v_cndmask.
|
||||
|
||||
define i32 @combine_sub_zext_xor() {
|
||||
define i32 @combine_sub_zext_xor(i32 inreg %cond) {
|
||||
; GFX1010-LABEL: combine_sub_zext_xor:
|
||||
; GFX1010: ; %bb.0: ; %.entry
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
|
||||
; GFX1010-NEXT: s_branch .LBB1_2
|
||||
; GFX1010-NEXT: .LBB1_1: ; %bb9
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GFX1010-NEXT: s_xor_b32 s4, s4, -1
|
||||
; GFX1010-NEXT: s_xor_b32 s5, s5, -1
|
||||
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5
|
||||
; GFX1010-NEXT: v_sub_nc_u32_e32 v2, v1, v0
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX1010-NEXT: s_cbranch_vccz .LBB1_4
|
||||
; GFX1010-NEXT: .LBB1_2: ; %.a
|
||||
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr4
|
||||
; GFX1010-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr5
|
||||
; GFX1010-NEXT: s_cbranch_vccnz .LBB1_1
|
||||
; GFX1010-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GFX1010-NEXT: buffer_load_dword v0, v1, s[4:7], 64 offen glc
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s4, 0, v0
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
|
||||
; GFX1010-NEXT: s_branch .LBB1_1
|
||||
; GFX1010-NEXT: .LBB1_4: ; %.exit
|
||||
; GFX1010-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -114,27 +130,32 @@ define i32 @combine_sub_zext_xor() {
|
||||
; GFX1100-LABEL: combine_sub_zext_xor:
|
||||
; GFX1100: ; %bb.0: ; %.entry
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
|
||||
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
|
||||
; GFX1100-NEXT: s_branch .LBB1_2
|
||||
; GFX1100-NEXT: .LBB1_1: ; %bb9
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GFX1100-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: s_xor_b32 s1, s1, -1
|
||||
; GFX1100-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
|
||||
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_sub_nc_u32_e32 v2, v1, v0
|
||||
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX1100-NEXT: s_cbranch_vccz .LBB1_4
|
||||
; GFX1100-NEXT: .LBB1_2: ; %.a
|
||||
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr0
|
||||
; GFX1100-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr1
|
||||
; GFX1100-NEXT: s_cbranch_vccnz .LBB1_1
|
||||
; GFX1100-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB1_2 Depth=1
|
||||
; GFX1100-NEXT: buffer_load_b32 v0, v1, s[0:3], 64 offen glc
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
|
||||
; GFX1100-NEXT: s_branch .LBB1_1
|
||||
; GFX1100-NEXT: .LBB1_4: ; %.exit
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -143,7 +164,8 @@ define i32 @combine_sub_zext_xor() {
|
||||
|
||||
.a: ; preds = %bb9, %.entry
|
||||
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
|
||||
br i1 undef, label %bb9, label %bb
|
||||
%cmp = icmp eq i32 %cond, 0
|
||||
br i1 %cmp, label %bb9, label %bb
|
||||
|
||||
bb: ; preds = %.a
|
||||
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
|
||||
@ -164,60 +186,71 @@ bb9: ; preds = %bb, %.a
|
||||
|
||||
; Test that unused lanes in the s_or result are masked out with v_cndmask.
|
||||
|
||||
define i32 @combine_add_zext_or() {
|
||||
define i32 @combine_add_zext_or(i32 inreg %cond) {
|
||||
; GFX1010-LABEL: combine_add_zext_or:
|
||||
; GFX1010: ; %bb.0: ; %.entry
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1010-NEXT: s_mov_b32 s4, 0
|
||||
; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
|
||||
; GFX1010-NEXT: s_mov_b32 s5, 0
|
||||
; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
|
||||
; GFX1010-NEXT: s_branch .LBB2_2
|
||||
; GFX1010-NEXT: .LBB2_1: ; %bb9
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX1010-NEXT: s_cmpk_gt_i32 s4, 0xfbe6
|
||||
; GFX1010-NEXT: s_cselect_b32 s6, -1, 0
|
||||
; GFX1010-NEXT: s_add_i32 s4, s4, 1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s6
|
||||
; GFX1010-NEXT: s_cmpk_gt_i32 s5, 0xfbe6
|
||||
; GFX1010-NEXT: s_cselect_b32 s7, -1, 0
|
||||
; GFX1010-NEXT: s_add_i32 s5, s5, 1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s7
|
||||
; GFX1010-NEXT: s_cbranch_vccz .LBB2_4
|
||||
; GFX1010-NEXT: .LBB2_2: ; %.a
|
||||
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr5
|
||||
; GFX1010-NEXT: s_cbranch_scc1 .LBB2_1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr6
|
||||
; GFX1010-NEXT: s_cbranch_vccnz .LBB2_1
|
||||
; GFX1010-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v0, s5
|
||||
; GFX1010-NEXT: buffer_load_dword v0, v0, s[4:7], 64 offen glc
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s6, 0, v0
|
||||
; GFX1010-NEXT: s_branch .LBB2_1
|
||||
; GFX1010-NEXT: .LBB2_4: ; %.exit
|
||||
; GFX1010-NEXT: s_or_b32 s4, s5, s6
|
||||
; GFX1010-NEXT: s_or_b32 s4, s6, s7
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1100-LABEL: combine_add_zext_or:
|
||||
; GFX1100: ; %bb.0: ; %.entry
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: s_mov_b32 s0, 0
|
||||
; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX1100-NEXT: s_mov_b32 s1, 0
|
||||
; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
|
||||
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
|
||||
; GFX1100-NEXT: s_branch .LBB2_2
|
||||
; GFX1100-NEXT: .LBB2_1: ; %bb9
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX1100-NEXT: s_cmpk_gt_i32 s0, 0xfbe6
|
||||
; GFX1100-NEXT: s_cselect_b32 s2, -1, 0
|
||||
; GFX1100-NEXT: s_add_i32 s0, s0, 1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s2
|
||||
; GFX1100-NEXT: s_cmpk_gt_i32 s1, 0xfbe6
|
||||
; GFX1100-NEXT: s_cselect_b32 s3, -1, 0
|
||||
; GFX1100-NEXT: s_add_i32 s1, s1, 1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s3
|
||||
; GFX1100-NEXT: s_cbranch_vccz .LBB2_4
|
||||
; GFX1100-NEXT: .LBB2_2: ; %.a
|
||||
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr1
|
||||
; GFX1100-NEXT: s_cbranch_scc1 .LBB2_1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr2
|
||||
; GFX1100-NEXT: s_cbranch_vccnz .LBB2_1
|
||||
; GFX1100-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX1100-NEXT: buffer_load_b32 v0, v0, s[0:3], 64 offen glc
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s2, 0, v0
|
||||
; GFX1100-NEXT: s_branch .LBB2_1
|
||||
; GFX1100-NEXT: .LBB2_4: ; %.exit
|
||||
; GFX1100-NEXT: s_or_b32 s0, s1, s2
|
||||
; GFX1100-NEXT: s_or_b32 s0, s2, s3
|
||||
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -226,7 +259,8 @@ define i32 @combine_add_zext_or() {
|
||||
|
||||
.a: ; preds = %bb9, %.entry
|
||||
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
|
||||
br i1 undef, label %bb9, label %bb
|
||||
%cmp = icmp eq i32 %cond, 0
|
||||
br i1 %cmp, label %bb9, label %bb
|
||||
|
||||
bb: ; preds = %.a
|
||||
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
|
||||
@ -248,60 +282,71 @@ bb9: ; preds = %bb, %.a
|
||||
|
||||
; Test that unused lanes in the s_or result are masked out with v_cndmask.
|
||||
|
||||
define i32 @combine_sub_zext_or() {
|
||||
define i32 @combine_sub_zext_or(i32 inreg %cond) {
|
||||
; GFX1010-LABEL: combine_sub_zext_or:
|
||||
; GFX1010: ; %bb.0: ; %.entry
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1010-NEXT: s_mov_b32 s4, 0
|
||||
; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
|
||||
; GFX1010-NEXT: s_mov_b32 s5, 0
|
||||
; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
|
||||
; GFX1010-NEXT: s_branch .LBB3_2
|
||||
; GFX1010-NEXT: .LBB3_1: ; %bb9
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX1010-NEXT: s_cmpk_gt_i32 s4, 0xfbe6
|
||||
; GFX1010-NEXT: s_cselect_b32 s6, -1, 0
|
||||
; GFX1010-NEXT: s_add_i32 s4, s4, -1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s6
|
||||
; GFX1010-NEXT: s_cmpk_gt_i32 s5, 0xfbe6
|
||||
; GFX1010-NEXT: s_cselect_b32 s7, -1, 0
|
||||
; GFX1010-NEXT: s_add_i32 s5, s5, -1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s7
|
||||
; GFX1010-NEXT: s_cbranch_vccz .LBB3_4
|
||||
; GFX1010-NEXT: .LBB3_2: ; %.a
|
||||
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr5
|
||||
; GFX1010-NEXT: s_cbranch_scc1 .LBB3_1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr6
|
||||
; GFX1010-NEXT: s_cbranch_vccnz .LBB3_1
|
||||
; GFX1010-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v0, s5
|
||||
; GFX1010-NEXT: buffer_load_dword v0, v0, s[4:7], 64 offen glc
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s6, 0, v0
|
||||
; GFX1010-NEXT: s_branch .LBB3_1
|
||||
; GFX1010-NEXT: .LBB3_4: ; %.exit
|
||||
; GFX1010-NEXT: s_or_b32 s4, s5, s6
|
||||
; GFX1010-NEXT: s_or_b32 s4, s6, s7
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX1100-LABEL: combine_sub_zext_or:
|
||||
; GFX1100: ; %bb.0: ; %.entry
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: s_mov_b32 s0, 0
|
||||
; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX1100-NEXT: s_mov_b32 s1, 0
|
||||
; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
|
||||
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
|
||||
; GFX1100-NEXT: s_branch .LBB3_2
|
||||
; GFX1100-NEXT: .LBB3_1: ; %bb9
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX1100-NEXT: s_cmpk_gt_i32 s0, 0xfbe6
|
||||
; GFX1100-NEXT: s_cselect_b32 s2, -1, 0
|
||||
; GFX1100-NEXT: s_add_i32 s0, s0, -1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s2
|
||||
; GFX1100-NEXT: s_cmpk_gt_i32 s1, 0xfbe6
|
||||
; GFX1100-NEXT: s_cselect_b32 s3, -1, 0
|
||||
; GFX1100-NEXT: s_add_i32 s1, s1, -1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s3
|
||||
; GFX1100-NEXT: s_cbranch_vccz .LBB3_4
|
||||
; GFX1100-NEXT: .LBB3_2: ; %.a
|
||||
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr1
|
||||
; GFX1100-NEXT: s_cbranch_scc1 .LBB3_1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr2
|
||||
; GFX1100-NEXT: s_cbranch_vccnz .LBB3_1
|
||||
; GFX1100-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX1100-NEXT: buffer_load_b32 v0, v0, s[0:3], 64 offen glc
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s2, 0, v0
|
||||
; GFX1100-NEXT: s_branch .LBB3_1
|
||||
; GFX1100-NEXT: .LBB3_4: ; %.exit
|
||||
; GFX1100-NEXT: s_or_b32 s0, s1, s2
|
||||
; GFX1100-NEXT: s_or_b32 s0, s2, s3
|
||||
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -310,7 +355,8 @@ define i32 @combine_sub_zext_or() {
|
||||
|
||||
.a: ; preds = %bb9, %.entry
|
||||
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
|
||||
br i1 undef, label %bb9, label %bb
|
||||
%cmp = icmp eq i32 %cond, 0
|
||||
br i1 %cmp, label %bb9, label %bb
|
||||
|
||||
bb: ; preds = %.a
|
||||
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
|
||||
@ -332,28 +378,33 @@ bb9: ; preds = %bb, %.a
|
||||
|
||||
; Test that unused lanes in the s_and result are masked out with v_cndmask.
|
||||
|
||||
define i32 @combine_add_zext_and() {
|
||||
define i32 @combine_add_zext_and(i32 inreg %cond) {
|
||||
; GFX1010-LABEL: combine_add_zext_and:
|
||||
; GFX1010: ; %bb.0: ; %.entry
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
|
||||
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
|
||||
; GFX1010-NEXT: s_branch .LBB4_2
|
||||
; GFX1010-NEXT: .LBB4_1: ; %bb9
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB4_2 Depth=1
|
||||
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
|
||||
; GFX1010-NEXT: s_and_b32 s4, s4, vcc_lo
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
|
||||
; GFX1010-NEXT: s_and_b32 s5, s5, vcc_lo
|
||||
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5
|
||||
; GFX1010-NEXT: v_add_nc_u32_e32 v1, v1, v0
|
||||
; GFX1010-NEXT: s_cbranch_vccz .LBB4_4
|
||||
; GFX1010-NEXT: .LBB4_2: ; %.a
|
||||
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr4
|
||||
; GFX1010-NEXT: s_cbranch_scc1 .LBB4_1
|
||||
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
|
||||
; GFX1010-NEXT: ; implicit-def: $sgpr5
|
||||
; GFX1010-NEXT: s_cbranch_vccnz .LBB4_1
|
||||
; GFX1010-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1010-NEXT: ; in Loop: Header=BB4_2 Depth=1
|
||||
; GFX1010-NEXT: buffer_load_dword v0, v1, s[4:7], 64 offen glc
|
||||
; GFX1010-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s4, 0, v0
|
||||
; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
|
||||
; GFX1010-NEXT: s_branch .LBB4_1
|
||||
; GFX1010-NEXT: .LBB4_4: ; %.exit
|
||||
; GFX1010-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -361,26 +412,32 @@ define i32 @combine_add_zext_and() {
|
||||
; GFX1100-LABEL: combine_add_zext_and:
|
||||
; GFX1100: ; %bb.0: ; %.entry
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
|
||||
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
|
||||
; GFX1100-NEXT: s_branch .LBB4_2
|
||||
; GFX1100-NEXT: .LBB4_1: ; %bb9
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB4_2 Depth=1
|
||||
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
||||
; GFX1100-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
|
||||
; GFX1100-NEXT: s_and_b32 s0, s0, vcc_lo
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||
; GFX1100-NEXT: s_and_b32 s1, s1, vcc_lo
|
||||
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
|
||||
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1100-NEXT: v_add_nc_u32_e32 v1, v1, v0
|
||||
; GFX1100-NEXT: s_cbranch_vccz .LBB4_4
|
||||
; GFX1100-NEXT: .LBB4_2: ; %.a
|
||||
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr0
|
||||
; GFX1100-NEXT: s_cbranch_scc1 .LBB4_1
|
||||
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
|
||||
; GFX1100-NEXT: ; implicit-def: $sgpr1
|
||||
; GFX1100-NEXT: s_cbranch_vccnz .LBB4_1
|
||||
; GFX1100-NEXT: ; %bb.3: ; %bb
|
||||
; GFX1100-NEXT: ; in Loop: Header=BB4_2 Depth=1
|
||||
; GFX1100-NEXT: buffer_load_b32 v0, v1, s[0:3], 64 offen glc
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
|
||||
; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
|
||||
; GFX1100-NEXT: s_branch .LBB4_1
|
||||
; GFX1100-NEXT: .LBB4_4: ; %.exit
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -389,7 +446,8 @@ define i32 @combine_add_zext_and() {
|
||||
|
||||
.a: ; preds = %bb9, %.entry
|
||||
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
|
||||
br i1 undef, label %bb9, label %bb
|
||||
%cmp = icmp eq i32 %cond, 0
|
||||
br i1 %cmp, label %bb9, label %bb
|
||||
|
||||
bb: ; preds = %.a
|
||||
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
|
||||
|
@ -81,7 +81,8 @@ define float @fold_abs_in_branch_undef(float %arg1, float %arg2) {
|
||||
entry:
|
||||
%0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
|
||||
%1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
|
||||
%2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float undef)
|
||||
%undef = freeze float poison
|
||||
%2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %undef)
|
||||
%3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
|
||||
%4 = fcmp ule float %3, 1.000000e+00
|
||||
br i1 %4, label %if, label %exit
|
||||
|
@ -2,12 +2,28 @@
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SILowerI1Copies was not handling IMPLICIT_DEF
|
||||
; SI-LABEL: {{^}}br_implicit_def:
|
||||
; SI-LABEL: {{^}}br_poison:
|
||||
; SI: %bb.0:
|
||||
; SI-NEXT: s_cbranch_scc1
|
||||
define amdgpu_kernel void @br_implicit_def(ptr addrspace(1) %out, i32 %arg) #0 {
|
||||
define amdgpu_kernel void @br_poison(ptr addrspace(1) %out, i32 %arg) #0 {
|
||||
bb:
|
||||
br i1 undef, label %bb1, label %bb2
|
||||
br i1 poison, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
store volatile i32 123, ptr addrspace(1) %out
|
||||
ret void
|
||||
|
||||
bb2:
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}br_freeze_poison:
|
||||
; SI: %bb.0:
|
||||
; SI-NEXT: s_cbranch_scc1
|
||||
define amdgpu_kernel void @br_freeze_poison(ptr addrspace(1) %out, i32 %arg) #0 {
|
||||
bb:
|
||||
%undef = freeze i1 poison
|
||||
br i1 %undef, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
store volatile i32 123, ptr addrspace(1) %out
|
||||
|
@ -11,7 +11,7 @@ body: |
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
|
||||
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst (s32) from `ptr addrspace(42) undef`)
|
||||
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst (s32) from `ptr addrspace(42) poison`)
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`)
|
||||
@ -30,7 +30,7 @@ body: |
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `ptr addrspace(42) undef`)
|
||||
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `ptr addrspace(42) poison`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -47,7 +47,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
|
||||
FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst (s32) on `ptr addrspace(42) undef`)
|
||||
FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst (s32) on `ptr addrspace(42) poison`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -63,7 +63,7 @@ body: |
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
|
||||
FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst (s32) on `ptr addrspace(42) undef`)
|
||||
FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst (s32) on `ptr addrspace(42) poison`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -19,7 +19,7 @@
|
||||
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 16, 24,
|
||||
|
||||
--- |
|
||||
@0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4
|
||||
@0 = internal unnamed_addr addrspace(3) global [256 x float] poison, align 4
|
||||
|
||||
define amdgpu_kernel void @ds_combine_base_offset() {
|
||||
bb.0:
|
||||
|
@ -689,7 +689,7 @@ divergent.ret:
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64
|
||||
; IR-NEXT: ret void
|
||||
define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 {
|
||||
define amdgpu_kernel void @multi_divergent_unreachable_exit(i32 %switch) #0 {
|
||||
bb:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
switch i32 %tmp, label %bb3 [
|
||||
@ -704,7 +704,7 @@ bb2: ; preds = %bb
|
||||
unreachable
|
||||
|
||||
bb3: ; preds = %bb
|
||||
switch i32 undef, label %bb5 [
|
||||
switch i32 %switch, label %bb5 [
|
||||
i32 2, label %bb4
|
||||
]
|
||||
|
||||
|
@ -252,10 +252,10 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) captures(none
|
||||
; IR: [[BB21]]:
|
||||
; IR-NEXT: [[MY_TMP22:%.*]] = extractelement <2 x i32> [[MY_TMP17]], i64 1
|
||||
; IR-NEXT: [[MY_TMP23:%.*]] = lshr i32 [[MY_TMP22]], 16
|
||||
; IR-NEXT: [[MY_TMP24:%.*]] = select i1 undef, i32 undef, i32 [[MY_TMP23]]
|
||||
; IR-NEXT: [[MY_TMP24:%.*]] = select i1 false, i32 0, i32 [[MY_TMP23]]
|
||||
; IR-NEXT: [[MY_TMP25:%.*]] = uitofp i32 [[MY_TMP24]] to float
|
||||
; IR-NEXT: [[MY_TMP26:%.*]] = fmul float [[MY_TMP25]], 0x3EF0001000000000
|
||||
; IR-NEXT: [[MY_TMP27:%.*]] = fsub float [[MY_TMP26]], undef
|
||||
; IR-NEXT: [[MY_TMP27:%.*]] = fsub float [[MY_TMP26]], 0x7FF8000000000000
|
||||
; IR-NEXT: [[MY_TMP28:%.*]] = fcmp olt float [[MY_TMP27]], 5.000000e-01
|
||||
; IR-NEXT: [[MY_TMP29:%.*]] = select i1 [[MY_TMP28]], i64 1, i64 2
|
||||
; IR-NEXT: [[MY_TMP30:%.*]] = extractelement <4 x i32> [[MY_TMP936]], i64 [[MY_TMP29]]
|
||||
@ -317,10 +317,10 @@ bb18: ; preds = %bb18, %bb16
|
||||
bb21: ; preds = %bb18
|
||||
%my.tmp22 = extractelement <2 x i32> %my.tmp17, i64 1
|
||||
%my.tmp23 = lshr i32 %my.tmp22, 16
|
||||
%my.tmp24 = select i1 undef, i32 undef, i32 %my.tmp23
|
||||
%my.tmp24 = select i1 false, i32 0, i32 %my.tmp23
|
||||
%my.tmp25 = uitofp i32 %my.tmp24 to float
|
||||
%my.tmp26 = fmul float %my.tmp25, 0x3EF0001000000000
|
||||
%my.tmp27 = fsub float %my.tmp26, undef
|
||||
%my.tmp27 = fsub float %my.tmp26, 0x7FF8000000000000
|
||||
%my.tmp28 = fcmp olt float %my.tmp27, 5.000000e-01
|
||||
%my.tmp29 = select i1 %my.tmp28, i64 1, i64 2
|
||||
%my.tmp30 = extractelement <4 x i32> %my.tmp936, i64 %my.tmp29
|
||||
|
@ -93,7 +93,7 @@ entry:
|
||||
%conv = add i32 %i6, %i7
|
||||
%conv.frozen = freeze i32 %conv
|
||||
%div = udiv i32 %conv.frozen, 49
|
||||
%add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 undef
|
||||
%add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 0
|
||||
%in.ptr1 = getelementptr inbounds float, ptr addrspace(1) %in, i32 %i5
|
||||
br label %for.cond28.preheader
|
||||
|
||||
@ -530,11 +530,11 @@ for.cond28.preheader: ; preds = %for.cond28.preheade
|
||||
br i1 %exitcond.not, label %for.cond.cleanup26, label %for.cond28.preheader
|
||||
|
||||
for.cond.cleanup26: ; preds = %for.cond28.preheader
|
||||
%mul119 = shl nuw nsw i32 undef, 1
|
||||
%mul119 = shl nuw nsw i32 0, 1
|
||||
%mul120 = mul i32 %div, 200704
|
||||
%mul121 = mul i32 undef, 6272
|
||||
%mul121 = mul i32 0, 6272
|
||||
%add122 = add i32 %mul120, %mul121
|
||||
%mul123 = mul nuw nsw i32 undef, 28
|
||||
%mul123 = mul nuw nsw i32 0, 28
|
||||
%add124 = add i32 %add122, %mul123
|
||||
%add126 = add i32 %add124, %mul119
|
||||
%idx.ext127 = zext i32 %add126 to i64
|
||||
|
@ -2,7 +2,6 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -S -passes=structurizecfg,si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=OPT %s
|
||||
; RUN: llc -mtriple=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
|
||||
; OPT-LABEL: @annotate_unreachable_noloop(
|
||||
; OPT-NOT: call i1 @llvm.amdgcn.loop
|
||||
|
||||
@ -19,7 +18,7 @@ bb1: ; preds = %bb
|
||||
%tmp2 = sext i32 %tmp to i64
|
||||
%tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
|
||||
%tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
|
||||
br i1 undef, label %bb5, label %bb3
|
||||
br i1 poison, label %bb5, label %bb3
|
||||
|
||||
bb3: ; preds = %bb1
|
||||
%tmp6 = extractelement <4 x float> %tmp4, i32 2
|
||||
@ -84,7 +83,8 @@ bb1: ; preds = %bb
|
||||
%tmp2 = sext i32 %tmp to i64
|
||||
%tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
|
||||
%tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
|
||||
br i1 undef, label %bb5, label %bb3
|
||||
%undef = freeze i1 poison
|
||||
br i1 %undef, label %bb5, label %bb3
|
||||
|
||||
bb3: ; preds = %bb1
|
||||
%tmp6 = extractelement <4 x float> %tmp4, i32 2
|
||||
|
@ -7,75 +7,75 @@
|
||||
; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
|
||||
; SI-NOT: v_readlane_b32 [[SAVED]]
|
||||
|
||||
define amdgpu_ps void @main() #0 {
|
||||
define amdgpu_ps void @main(<4 x i32> inreg %rsrc) #0 {
|
||||
main_body:
|
||||
%tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 16, i32 0)
|
||||
%tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 32, i32 0)
|
||||
%tmp2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 80, i32 0)
|
||||
%tmp3 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 84, i32 0)
|
||||
%tmp4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 88, i32 0)
|
||||
%tmp5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 96, i32 0)
|
||||
%tmp6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 100, i32 0)
|
||||
%tmp7 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 104, i32 0)
|
||||
%tmp8 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 112, i32 0)
|
||||
%tmp9 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 116, i32 0)
|
||||
%tmp10 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 120, i32 0)
|
||||
%tmp11 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 128, i32 0)
|
||||
%tmp12 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 132, i32 0)
|
||||
%tmp13 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 136, i32 0)
|
||||
%tmp14 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 144, i32 0)
|
||||
%tmp15 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 148, i32 0)
|
||||
%tmp16 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 152, i32 0)
|
||||
%tmp17 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 160, i32 0)
|
||||
%tmp18 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 164, i32 0)
|
||||
%tmp19 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 168, i32 0)
|
||||
%tmp20 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 176, i32 0)
|
||||
%tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 180, i32 0)
|
||||
%tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 184, i32 0)
|
||||
%tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 192, i32 0)
|
||||
%tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 196, i32 0)
|
||||
%tmp25 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 200, i32 0)
|
||||
%tmp26 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 208, i32 0)
|
||||
%tmp27 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 212, i32 0)
|
||||
%tmp28 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 216, i32 0)
|
||||
%tmp29 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 224, i32 0)
|
||||
%tmp30 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 228, i32 0)
|
||||
%tmp31 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 232, i32 0)
|
||||
%tmp32 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 240, i32 0)
|
||||
%tmp33 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 244, i32 0)
|
||||
%tmp34 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 248, i32 0)
|
||||
%tmp35 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 256, i32 0)
|
||||
%tmp36 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 260, i32 0)
|
||||
%tmp37 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 264, i32 0)
|
||||
%tmp38 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 272, i32 0)
|
||||
%tmp39 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 276, i32 0)
|
||||
%tmp40 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 280, i32 0)
|
||||
%tmp41 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 288, i32 0)
|
||||
%tmp42 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 292, i32 0)
|
||||
%tmp43 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 296, i32 0)
|
||||
%tmp44 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 304, i32 0)
|
||||
%tmp45 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 308, i32 0)
|
||||
%tmp46 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 312, i32 0)
|
||||
%tmp47 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 320, i32 0)
|
||||
%tmp48 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 324, i32 0)
|
||||
%tmp49 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 328, i32 0)
|
||||
%tmp50 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 336, i32 0)
|
||||
%tmp51 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 340, i32 0)
|
||||
%tmp52 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 344, i32 0)
|
||||
%tmp53 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 352, i32 0)
|
||||
%tmp54 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 356, i32 0)
|
||||
%tmp55 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 360, i32 0)
|
||||
%tmp56 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 368, i32 0)
|
||||
%tmp57 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 372, i32 0)
|
||||
%tmp58 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 376, i32 0)
|
||||
%tmp59 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 384, i32 0)
|
||||
%tmp60 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 388, i32 0)
|
||||
%tmp61 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 392, i32 0)
|
||||
%tmp62 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 400, i32 0)
|
||||
%tmp63 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 404, i32 0)
|
||||
%tmp64 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 408, i32 0)
|
||||
%tmp65 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 416, i32 0)
|
||||
%tmp66 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 420, i32 0)
|
||||
%tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 16, i32 0)
|
||||
%tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 32, i32 0)
|
||||
%tmp2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 80, i32 0)
|
||||
%tmp3 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 84, i32 0)
|
||||
%tmp4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 88, i32 0)
|
||||
%tmp5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 96, i32 0)
|
||||
%tmp6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 100, i32 0)
|
||||
%tmp7 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 104, i32 0)
|
||||
%tmp8 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 112, i32 0)
|
||||
%tmp9 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 116, i32 0)
|
||||
%tmp10 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 120, i32 0)
|
||||
%tmp11 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 128, i32 0)
|
||||
%tmp12 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 132, i32 0)
|
||||
%tmp13 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 136, i32 0)
|
||||
%tmp14 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 144, i32 0)
|
||||
%tmp15 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 148, i32 0)
|
||||
%tmp16 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 152, i32 0)
|
||||
%tmp17 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 160, i32 0)
|
||||
%tmp18 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 164, i32 0)
|
||||
%tmp19 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 168, i32 0)
|
||||
%tmp20 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 176, i32 0)
|
||||
%tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 180, i32 0)
|
||||
%tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 184, i32 0)
|
||||
%tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 192, i32 0)
|
||||
%tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 196, i32 0)
|
||||
%tmp25 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 200, i32 0)
|
||||
%tmp26 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 208, i32 0)
|
||||
%tmp27 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 212, i32 0)
|
||||
%tmp28 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 216, i32 0)
|
||||
%tmp29 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 224, i32 0)
|
||||
%tmp30 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 228, i32 0)
|
||||
%tmp31 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 232, i32 0)
|
||||
%tmp32 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 240, i32 0)
|
||||
%tmp33 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 244, i32 0)
|
||||
%tmp34 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 248, i32 0)
|
||||
%tmp35 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 256, i32 0)
|
||||
%tmp36 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 260, i32 0)
|
||||
%tmp37 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 264, i32 0)
|
||||
%tmp38 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 272, i32 0)
|
||||
%tmp39 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 276, i32 0)
|
||||
%tmp40 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 280, i32 0)
|
||||
%tmp41 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 288, i32 0)
|
||||
%tmp42 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 292, i32 0)
|
||||
%tmp43 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 296, i32 0)
|
||||
%tmp44 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 304, i32 0)
|
||||
%tmp45 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 308, i32 0)
|
||||
%tmp46 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 312, i32 0)
|
||||
%tmp47 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 320, i32 0)
|
||||
%tmp48 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 324, i32 0)
|
||||
%tmp49 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 328, i32 0)
|
||||
%tmp50 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 336, i32 0)
|
||||
%tmp51 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 340, i32 0)
|
||||
%tmp52 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 344, i32 0)
|
||||
%tmp53 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 352, i32 0)
|
||||
%tmp54 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 356, i32 0)
|
||||
%tmp55 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 360, i32 0)
|
||||
%tmp56 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 368, i32 0)
|
||||
%tmp57 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 372, i32 0)
|
||||
%tmp58 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 376, i32 0)
|
||||
%tmp59 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 384, i32 0)
|
||||
%tmp60 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 388, i32 0)
|
||||
%tmp61 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 392, i32 0)
|
||||
%tmp62 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 400, i32 0)
|
||||
%tmp63 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 404, i32 0)
|
||||
%tmp64 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 408, i32 0)
|
||||
%tmp65 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 416, i32 0)
|
||||
%tmp66 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 420, i32 0)
|
||||
br label %LOOP
|
||||
|
||||
LOOP: ; preds = %ENDIF2795, %main_body
|
||||
@ -90,7 +90,7 @@ ENDLOOP: ; preds = %ELSE2566, %LOOP
|
||||
%one.sub.ac.i = fmul float %one.sub.a.i, 0x7FF8000000000000
|
||||
%fmul = fmul float 0x7FF8000000000000, 0x7FF8000000000000
|
||||
%result.i = fadd float %fmul, %one.sub.ac.i
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float poison, float %result.i, float 0x7FF8000000000000, float 1.000000e+00, i1 true, i1 true) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float poison, float %result.i, float poison, float 1.000000e+00, i1 true, i1 true) #0
|
||||
ret void
|
||||
|
||||
ENDIF: ; preds = %LOOP
|
||||
@ -107,9 +107,9 @@ ENDIF: ; preds = %LOOP
|
||||
%tmp78 = call float @llvm.minnum.f32(float %tmp73, float %tmp77)
|
||||
%tmp79 = call float @llvm.maxnum.f32(float %tmp71, float 0.000000e+00)
|
||||
%tmp80 = call float @llvm.maxnum.f32(float %tmp72, float %tmp76)
|
||||
%tmp81 = call float @llvm.maxnum.f32(float poison, float %tmp78)
|
||||
%tmp81 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp78)
|
||||
%tmp82 = call float @llvm.minnum.f32(float %tmp79, float %tmp80)
|
||||
%tmp83 = call float @llvm.minnum.f32(float %tmp82, float poison)
|
||||
%tmp83 = call float @llvm.minnum.f32(float %tmp82, float 0x7FF8000000000000)
|
||||
%tmp84 = fsub float %tmp14, 0x7FF8000000000000
|
||||
%tmp85 = fsub float %tmp15, 0x7FF8000000000000
|
||||
%tmp86 = fsub float %tmp16, 0x7FF8000000000000
|
||||
@ -125,19 +125,19 @@ ENDIF: ; preds = %LOOP
|
||||
%tmp96 = call float @llvm.minnum.f32(float %tmp88, float %tmp94)
|
||||
%tmp97 = call float @llvm.maxnum.f32(float %tmp87, float %tmp93)
|
||||
%tmp98 = call float @llvm.maxnum.f32(float %tmp89, float %tmp95)
|
||||
%tmp99 = call float @llvm.maxnum.f32(float poison, float %tmp96)
|
||||
%tmp100 = call float @llvm.maxnum.f32(float %tmp99, float poison)
|
||||
%tmp101 = call float @llvm.minnum.f32(float %tmp97, float poison)
|
||||
%tmp99 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp96)
|
||||
%tmp100 = call float @llvm.maxnum.f32(float %tmp99, float 0x7FF8000000000000)
|
||||
%tmp101 = call float @llvm.minnum.f32(float %tmp97, float 0x7FF8000000000000)
|
||||
%tmp102 = call float @llvm.minnum.f32(float %tmp101, float %tmp98)
|
||||
%tmp103 = fsub float %tmp30, 0x7FF8000000000000
|
||||
%tmp104 = fsub float %tmp31, 0x7FF8000000000000
|
||||
%tmp105 = fmul float %tmp103, 0.000000e+00
|
||||
%tmp106 = fmul float %tmp104, 0.000000e+00
|
||||
%tmp107 = call float @llvm.minnum.f32(float poison, float %tmp105)
|
||||
%tmp108 = call float @llvm.maxnum.f32(float poison, float %tmp106)
|
||||
%tmp109 = call float @llvm.maxnum.f32(float poison, float %tmp107)
|
||||
%tmp110 = call float @llvm.maxnum.f32(float %tmp109, float poison)
|
||||
%tmp111 = call float @llvm.minnum.f32(float poison, float %tmp108)
|
||||
%tmp107 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp105)
|
||||
%tmp108 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp106)
|
||||
%tmp109 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp107)
|
||||
%tmp110 = call float @llvm.maxnum.f32(float %tmp109, float 0x7FF8000000000000)
|
||||
%tmp111 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp108)
|
||||
%tmp112 = fsub float %tmp32, 0x7FF8000000000000
|
||||
%tmp113 = fsub float %tmp33, 0x7FF8000000000000
|
||||
%tmp114 = fsub float %tmp34, 0x7FF8000000000000
|
||||
@ -219,18 +219,20 @@ ENDIF: ; preds = %LOOP
|
||||
%tmp190 = fmul float %tmp188, 0x7FF8000000000000
|
||||
%tmp191 = call float @llvm.maxnum.f32(float %tmp184, float %tmp189)
|
||||
%tmp192 = call float @llvm.maxnum.f32(float %tmp185, float %tmp190)
|
||||
%tmp193 = call float @llvm.maxnum.f32(float %tmp186, float poison)
|
||||
%tmp193 = call float @llvm.maxnum.f32(float %tmp186, float 0x7FF8000000000000)
|
||||
%tmp194 = call float @llvm.minnum.f32(float %tmp191, float %tmp192)
|
||||
%tmp195 = call float @llvm.minnum.f32(float %tmp194, float %tmp193)
|
||||
%.temp292.7 = select i1 undef, float %tmp162, float poison
|
||||
%undef0 = freeze i1 poison
|
||||
%.temp292.7 = select i1 %undef0, float %tmp162, float 0x7FF8000000000000
|
||||
%temp292.9 = select i1 false, float %tmp180, float %.temp292.7
|
||||
%.temp292.9 = select i1 undef, float poison, float %temp292.9
|
||||
%undef1 = freeze i1 poison
|
||||
%.temp292.9 = select i1 %undef1, float 0x7FF8000000000000, float %temp292.9
|
||||
%tmp196 = fcmp ogt float 0x7FF8000000000000, 0.000000e+00
|
||||
%tmp197 = fcmp olt float 0x7FF8000000000000, %tmp195
|
||||
%tmp198 = and i1 %tmp196, %tmp197
|
||||
%tmp199 = fcmp olt float 0x7FF8000000000000, %.temp292.9
|
||||
%tmp200 = and i1 %tmp198, %tmp199
|
||||
%temp292.11 = select i1 %tmp200, float poison, float %.temp292.9
|
||||
%temp292.11 = select i1 %tmp200, float 0x7FF8000000000000, float %.temp292.9
|
||||
%tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%cmp0 = icmp eq i32 %tid0, 0
|
||||
br i1 %cmp0, label %IF2565, label %ELSE2566
|
||||
@ -238,7 +240,17 @@ ENDIF: ; preds = %LOOP
|
||||
IF2565: ; preds = %ENDIF
|
||||
%tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%cmp1 = icmp eq i32 %tid1, 0
|
||||
br i1 %cmp1, label %ENDIF2582, label %ELSE2584
|
||||
%tmp212 = fadd float %tmp1, 0x7FF8000000000000
|
||||
%tmp213 = fadd float 0.000000e+00, %tmp212
|
||||
%floor = call float @llvm.floor.f32(float %tmp213)
|
||||
%tmp214 = fsub float %tmp213, %floor
|
||||
%tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%cmp4 = icmp eq i32 %tid4, 0
|
||||
%tmp215 = fsub float 1.000000e+00, %tmp214
|
||||
%tmp216 = call float @llvm.sqrt.f32(float %tmp215)
|
||||
%tmp217 = fmul float %tmp216, 0x7FF8000000000000
|
||||
%tmp218 = fadd float %tmp217, 0x7FF8000000000000
|
||||
br label %ENDIF2564
|
||||
|
||||
ELSE2566: ; preds = %ENDIF
|
||||
%tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
@ -246,14 +258,14 @@ ELSE2566: ; preds = %ENDIF
|
||||
%tmp201 = fcmp oeq float %temp292.11, %tidf
|
||||
br i1 %tmp201, label %ENDLOOP, label %ELSE2593
|
||||
|
||||
ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
|
||||
%temp894.1 = phi float [ poison, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
|
||||
%temp18.1 = phi float [ %tmp218, %ENDIF2588 ], [ poison, %ENDIF2594 ]
|
||||
ENDIF2564: ; preds = %ENDIF2594, %IF2565
|
||||
%temp894.1 = phi float [ poison, %IF2565 ], [ %temp894.2, %ENDIF2594 ]
|
||||
%temp18.1 = phi float [ %tmp218, %IF2565 ], [ poison, %ENDIF2594 ]
|
||||
%tmp202 = fsub float %tmp5, 0x7FF8000000000000
|
||||
%tmp203 = fmul float %tmp202, 0x7FF8000000000000
|
||||
%tmp204 = call float @llvm.maxnum.f32(float poison, float %tmp203)
|
||||
%tmp205 = call float @llvm.minnum.f32(float %tmp204, float poison)
|
||||
%tmp206 = call float @llvm.minnum.f32(float %tmp205, float poison)
|
||||
%tmp204 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp203)
|
||||
%tmp205 = call float @llvm.minnum.f32(float %tmp204, float 0x7FF8000000000000)
|
||||
%tmp206 = call float @llvm.minnum.f32(float %tmp205, float 0x7FF8000000000000)
|
||||
%tmp207 = fcmp ogt float 0x7FF8000000000000, 0.000000e+00
|
||||
%tmp208 = fcmp olt float 0x7FF8000000000000, 1.000000e+00
|
||||
%tmp209 = and i1 %tmp207, %tmp208
|
||||
@ -263,31 +275,6 @@ ENDIF2564: ; preds = %ENDIF2594, %ENDIF25
|
||||
%tmp211 = and i1 %tmp209, %tmp210
|
||||
br i1 %tmp211, label %ENDIF2795, label %ELSE2797
|
||||
|
||||
ELSE2584: ; preds = %IF2565
|
||||
br label %ENDIF2582
|
||||
|
||||
ENDIF2582: ; preds = %ELSE2584, %IF2565
|
||||
%tmp212 = fadd float %tmp1, 0x7FF8000000000000
|
||||
%tmp213 = fadd float 0.000000e+00, %tmp212
|
||||
%floor = call float @llvm.floor.f32(float %tmp213)
|
||||
%tmp214 = fsub float %tmp213, %floor
|
||||
%tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%cmp4 = icmp eq i32 %tid4, 0
|
||||
br i1 %cmp4, label %IF2589, label %ELSE2590
|
||||
|
||||
IF2589: ; preds = %ENDIF2582
|
||||
br label %ENDIF2588
|
||||
|
||||
ELSE2590: ; preds = %ENDIF2582
|
||||
br label %ENDIF2588
|
||||
|
||||
ENDIF2588: ; preds = %ELSE2590, %IF2589
|
||||
%tmp215 = fsub float 1.000000e+00, %tmp214
|
||||
%tmp216 = call float @llvm.sqrt.f32(float %tmp215)
|
||||
%tmp217 = fmul float %tmp216, 0x7FF8000000000000
|
||||
%tmp218 = fadd float %tmp217, 0x7FF8000000000000
|
||||
br label %ENDIF2564
|
||||
|
||||
ELSE2593: ; preds = %ELSE2566
|
||||
%tmp219 = fcmp oeq float %temp292.11, %tmp81
|
||||
%tmp220 = fcmp olt float %tmp81, %tmp83
|
||||
@ -298,24 +285,20 @@ ELSE2596: ; preds = %ELSE2593
|
||||
%tmp222 = fcmp oeq float %temp292.11, %tmp100
|
||||
%tmp223 = fcmp olt float %tmp100, %tmp102
|
||||
%tmp224 = and i1 %tmp222, %tmp223
|
||||
br i1 %tmp224, label %ENDIF2594, label %ELSE2632
|
||||
%undef_ELSE2596 = freeze i1 poison
|
||||
%brmerge = or i1 %tmp224, %undef_ELSE2596
|
||||
br i1 %brmerge, label %ENDIF2594, label %ELSE2650
|
||||
|
||||
ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
|
||||
%temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
|
||||
ENDIF2594: ; preds = %ELSE2704, %ELSE2650, %ELSE2596, %ELSE2686, %ELSE2668, %ELSE2593
|
||||
%temp894.2 = phi float [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ], [ 0.000000e+00, %ELSE2650 ], [ %spec.select6, %ELSE2704 ]
|
||||
%tmp225 = fmul float %temp894.2, 0x7FF8000000000000
|
||||
br label %ENDIF2564
|
||||
|
||||
ELSE2632: ; preds = %ELSE2596
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2650
|
||||
|
||||
ELSE2650: ; preds = %ELSE2632
|
||||
ELSE2650: ; preds = %ELSE2596
|
||||
%tmp226 = fcmp oeq float %temp292.11, %tmp110
|
||||
%tmp227 = fcmp olt float %tmp110, %tmp111
|
||||
%tmp228 = and i1 %tmp226, %tmp227
|
||||
br i1 %tmp228, label %IF2667, label %ELSE2668
|
||||
|
||||
IF2667: ; preds = %ELSE2650
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2671
|
||||
br i1 %tmp228, label %ENDIF2594, label %ELSE2668
|
||||
|
||||
ELSE2668: ; preds = %ELSE2650
|
||||
%tmp229 = fcmp oeq float %temp292.11, %tmp128
|
||||
@ -323,9 +306,6 @@ ELSE2668: ; preds = %ELSE2650
|
||||
%tmp231 = and i1 %tmp229, %tmp230
|
||||
br i1 %tmp231, label %ENDIF2594, label %ELSE2686
|
||||
|
||||
ELSE2671: ; preds = %IF2667
|
||||
br label %ENDIF2594
|
||||
|
||||
ELSE2686: ; preds = %ELSE2668
|
||||
%tmp232 = fcmp oeq float %temp292.11, %tmp145
|
||||
%tmp233 = fcmp olt float %tmp145, 0x7FF8000000000000
|
||||
@ -336,37 +316,9 @@ ELSE2704: ; preds = %ELSE2686
|
||||
%tmp235 = fcmp oeq float %temp292.11, %tmp180
|
||||
%tmp236 = fcmp olt float %tmp180, 0x7FF8000000000000
|
||||
%tmp237 = and i1 %tmp235, %tmp236
|
||||
br i1 %tmp237, label %ENDIF2594, label %ELSE2740
|
||||
|
||||
ELSE2740: ; preds = %ELSE2704
|
||||
br i1 undef, label %IF2757, label %ELSE2758
|
||||
|
||||
IF2757: ; preds = %ELSE2740
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2761
|
||||
|
||||
ELSE2758: ; preds = %ELSE2740
|
||||
br i1 undef, label %IF2775, label %ENDIF2594
|
||||
|
||||
ELSE2761: ; preds = %IF2757
|
||||
br label %ENDIF2594
|
||||
|
||||
IF2775: ; preds = %ELSE2758
|
||||
%tmp238 = fcmp olt float 0x7FF8000000000000, 0x7FF8000000000000
|
||||
br i1 %tmp238, label %ENDIF2594, label %ELSE2779
|
||||
|
||||
ELSE2779: ; preds = %IF2775
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2782
|
||||
|
||||
ELSE2782: ; preds = %ELSE2779
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2785
|
||||
|
||||
ELSE2785: ; preds = %ELSE2782
|
||||
%tmp239 = fcmp olt float 0x7FF8000000000000, 0.000000e+00
|
||||
br i1 %tmp239, label %ENDIF2594, label %ELSE2788
|
||||
|
||||
ELSE2788: ; preds = %ELSE2785
|
||||
%tmp240 = fcmp olt float 0.000000e+00, 0x7FF8000000000000
|
||||
%.2848 = select i1 %tmp240, float -1.000000e+00, float 1.000000e+00
|
||||
%undef.ELSE2704 = freeze i1 poison
|
||||
%spec.select = select i1 %undef.ELSE2704, float 0.000000e+00, float %temp894.0
|
||||
%spec.select6 = select i1 %tmp237, float 0.000000e+00, float %spec.select
|
||||
br label %ENDIF2594
|
||||
|
||||
ELSE2797: ; preds = %ENDIF2564
|
||||
@ -386,22 +338,19 @@ ELSE2797: ; preds = %ENDIF2564
|
||||
%tmp254 = call float @llvm.minnum.f32(float %tmp245, float %tmp251)
|
||||
%tmp255 = call float @llvm.maxnum.f32(float %tmp246, float %tmp252)
|
||||
%tmp256 = call float @llvm.maxnum.f32(float %tmp253, float %tmp254)
|
||||
%tmp257 = call float @llvm.maxnum.f32(float %tmp256, float poison)
|
||||
%tmp258 = call float @llvm.minnum.f32(float poison, float %tmp255)
|
||||
%tmp257 = call float @llvm.maxnum.f32(float %tmp256, float 0x7FF8000000000000)
|
||||
%tmp258 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp255)
|
||||
%tmp259 = fcmp ogt float %tmp257, 0.000000e+00
|
||||
%tmp260 = fcmp olt float %tmp257, 1.000000e+00
|
||||
%tmp261 = and i1 %tmp259, %tmp260
|
||||
%tmp262 = fcmp olt float %tmp257, %tmp258
|
||||
%tmp263 = and i1 %tmp261, %tmp262
|
||||
br i1 %tmp263, label %ENDIF2795, label %ELSE2800
|
||||
br i1 %tmp263, label %ENDIF2795, label %ELSE2803
|
||||
|
||||
ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
|
||||
ENDIF2795: ; preds = %ELSE2806, %ELSE2797, %ELSE2824, %ELSE2821, %ELSE2803, %ENDIF2564
|
||||
br label %LOOP
|
||||
|
||||
ELSE2800: ; preds = %ELSE2797
|
||||
br i1 undef, label %ENDIF2795, label %ELSE2803
|
||||
|
||||
ELSE2803: ; preds = %ELSE2800
|
||||
ELSE2803: ; preds = %ELSE2797
|
||||
%tmp264 = fsub float %tmp20, 0x7FF8000000000000
|
||||
%tmp265 = fsub float %tmp21, 0x7FF8000000000000
|
||||
%tmp266 = fsub float %tmp22, 0x7FF8000000000000
|
||||
@ -417,9 +366,9 @@ ELSE2803: ; preds = %ELSE2800
|
||||
%tmp276 = call float @llvm.minnum.f32(float %tmp267, float %tmp273)
|
||||
%tmp277 = call float @llvm.maxnum.f32(float %tmp268, float %tmp274)
|
||||
%tmp278 = call float @llvm.maxnum.f32(float %tmp269, float %tmp275)
|
||||
%tmp279 = call float @llvm.maxnum.f32(float %tmp276, float poison)
|
||||
%tmp280 = call float @llvm.maxnum.f32(float %tmp279, float poison)
|
||||
%tmp281 = call float @llvm.minnum.f32(float poison, float %tmp277)
|
||||
%tmp279 = call float @llvm.maxnum.f32(float %tmp276, float 0x7FF8000000000000)
|
||||
%tmp280 = call float @llvm.maxnum.f32(float %tmp279, float 0x7FF8000000000000)
|
||||
%tmp281 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp277)
|
||||
%tmp282 = call float @llvm.minnum.f32(float %tmp281, float %tmp278)
|
||||
%tmp283 = fcmp ogt float %tmp280, 0.000000e+00
|
||||
%tmp284 = fcmp olt float %tmp280, 1.000000e+00
|
||||
@ -438,31 +387,19 @@ ELSE2806: ; preds = %ELSE2803
|
||||
%tmp294 = fsub float %tmp29, 0x7FF8000000000000
|
||||
%tmp295 = fmul float %tmp294, 0x7FF8000000000000
|
||||
%tmp296 = call float @llvm.minnum.f32(float %tmp291, float %tmp295)
|
||||
%tmp297 = call float @llvm.minnum.f32(float %tmp292, float poison)
|
||||
%tmp298 = call float @llvm.maxnum.f32(float %tmp293, float poison)
|
||||
%tmp297 = call float @llvm.minnum.f32(float %tmp292, float 0x7FF8000000000000)
|
||||
%tmp298 = call float @llvm.maxnum.f32(float %tmp293, float 0x7FF8000000000000)
|
||||
%tmp299 = call float @llvm.maxnum.f32(float %tmp296, float %tmp297)
|
||||
%tmp300 = call float @llvm.maxnum.f32(float %tmp299, float poison)
|
||||
%tmp301 = call float @llvm.minnum.f32(float poison, float %tmp298)
|
||||
%tmp300 = call float @llvm.maxnum.f32(float %tmp299, float 0x7FF8000000000000)
|
||||
%tmp301 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp298)
|
||||
%tmp302 = fcmp ogt float %tmp300, 0.000000e+00
|
||||
%tmp303 = fcmp olt float %tmp300, 1.000000e+00
|
||||
%tmp304 = and i1 %tmp302, %tmp303
|
||||
%tmp305 = fcmp olt float %tmp300, %tmp301
|
||||
%tmp306 = and i1 %tmp304, %tmp305
|
||||
br i1 %tmp306, label %ENDIF2795, label %ELSE2809
|
||||
br i1 %tmp306, label %ENDIF2795, label %ELSE2821
|
||||
|
||||
ELSE2809: ; preds = %ELSE2806
|
||||
br i1 undef, label %ENDIF2795, label %ELSE2812
|
||||
|
||||
ELSE2812: ; preds = %ELSE2809
|
||||
br i1 undef, label %ENDIF2795, label %ELSE2815
|
||||
|
||||
ELSE2815: ; preds = %ELSE2812
|
||||
br i1 undef, label %ENDIF2795, label %ELSE2818
|
||||
|
||||
ELSE2818: ; preds = %ELSE2815
|
||||
br i1 undef, label %ENDIF2795, label %ELSE2821
|
||||
|
||||
ELSE2821: ; preds = %ELSE2818
|
||||
ELSE2821: ; preds = %ELSE2806
|
||||
%tmp307 = fsub float %tmp56, 0x7FF8000000000000
|
||||
%tmp308 = fsub float %tmp57, 0x7FF8000000000000
|
||||
%tmp309 = fsub float %tmp58, 0x7FF8000000000000
|
||||
@ -488,7 +425,8 @@ ELSE2821: ; preds = %ELSE2818
|
||||
br i1 %tmp328, label %ENDIF2795, label %ELSE2824
|
||||
|
||||
ELSE2824: ; preds = %ELSE2821
|
||||
%.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
|
||||
%undef = freeze i1 poison
|
||||
%.2849 = select i1 %undef, float 0.000000e+00, float 1.000000e+00
|
||||
br label %ENDIF2795
|
||||
}
|
||||
|
||||
|
@ -1134,17 +1134,19 @@ exit:
|
||||
}
|
||||
|
||||
; bug 28550
|
||||
define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
||||
define amdgpu_ps void @phi_use_def_before_kill(float inreg %x, i32 inreg %y) #0 {
|
||||
; SI-LABEL: phi_use_def_before_kill:
|
||||
; SI: ; %bb.0: ; %bb
|
||||
; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
|
||||
; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
||||
; SI-NEXT: s_mov_b64 s[2:3], exec
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
||||
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
||||
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB11_6
|
||||
; SI-NEXT: ; %bb.1: ; %bb
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
||||
; SI-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB11_3
|
||||
; SI-NEXT: ; %bb.2: ; %bb8
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
@ -1172,13 +1174,15 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
||||
; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
|
||||
; GFX10-WAVE64: ; %bb.0: ; %bb
|
||||
; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
|
||||
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
||||
; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
||||
; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
|
||||
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
||||
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
||||
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6
|
||||
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
|
||||
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
||||
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3
|
||||
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
|
||||
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
|
||||
@ -1202,13 +1206,15 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
||||
; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
|
||||
; GFX10-WAVE32: ; %bb.0: ; %bb
|
||||
; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
|
||||
; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo
|
||||
; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
|
||||
; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
|
||||
; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
|
||||
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
||||
; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, vcc_lo
|
||||
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6
|
||||
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
|
||||
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
||||
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3
|
||||
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
|
||||
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
|
||||
@ -1232,14 +1238,16 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
||||
; GFX11-LABEL: phi_use_def_before_kill:
|
||||
; GFX11: ; %bb.0: ; %bb
|
||||
; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0
|
||||
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
||||
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
||||
; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
|
||||
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
||||
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
|
||||
; GFX11-NEXT: s_cbranch_scc0 .LBB11_6
|
||||
; GFX11-NEXT: ; %bb.1: ; %bb
|
||||
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
||||
; GFX11-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; GFX11-NEXT: s_cbranch_scc0 .LBB11_3
|
||||
; GFX11-NEXT: ; %bb.2: ; %bb8
|
||||
; GFX11-NEXT: v_mov_b32_e32 v1, 8
|
||||
@ -1265,7 +1273,8 @@ bb:
|
||||
%tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
|
||||
%cmp.tmp2 = fcmp olt float %tmp2, 0.0
|
||||
call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
|
||||
br i1 undef, label %phibb, label %bb8
|
||||
%uniform.cond = icmp eq i32 %y, 0
|
||||
br i1 %uniform.cond, label %phibb, label %bb8
|
||||
|
||||
phibb:
|
||||
%tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
|
||||
|
@ -34,7 +34,7 @@ entry:
|
||||
%conv = add i32 %i6, %i7
|
||||
%conv.frozen = freeze i32 %conv
|
||||
%div = udiv i32 %conv.frozen, 49
|
||||
%add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 undef
|
||||
%add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 0
|
||||
%in.ptr1 = getelementptr inbounds float, ptr addrspace(1) %in, i32 %i5
|
||||
br label %for.cond28.preheader
|
||||
|
||||
@ -471,11 +471,11 @@ for.cond28.preheader: ; preds = %for.cond28.preheade
|
||||
br i1 %exitcond.not, label %for.cond.cleanup26, label %for.cond28.preheader
|
||||
|
||||
for.cond.cleanup26: ; preds = %for.cond28.preheader
|
||||
%mul119 = shl nuw nsw i32 undef, 1
|
||||
%mul119 = shl nuw nsw i32 0, 1
|
||||
%mul120 = mul i32 %div, 200704
|
||||
%mul121 = mul i32 undef, 6272
|
||||
%mul121 = mul i32 0, 6272
|
||||
%add122 = add i32 %mul120, %mul121
|
||||
%mul123 = mul nuw nsw i32 undef, 28
|
||||
%mul123 = mul nuw nsw i32 0, 28
|
||||
%add124 = add i32 %add122, %mul123
|
||||
%add126 = add i32 %add124, %mul119
|
||||
%idx.ext127 = zext i32 %add126 to i64
|
||||
|
@ -87,18 +87,18 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %301:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %302:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %356:sgpr_128, undef %357:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %367:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, undef %358:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %368:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4)
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %351:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %352:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %363:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc
|
||||
@ -116,7 +116,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %383:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %384:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4)
|
||||
@ -198,9 +198,9 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]]
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %469:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %470:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
|
||||
; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1
|
||||
; CHECK-NEXT: KILL undef %469:sreg_64
|
||||
; CHECK-NEXT: KILL undef %470:sreg_64
|
||||
; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3
|
||||
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4)
|
||||
@ -211,8 +211,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4)
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
|
||||
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
|
||||
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
|
||||
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]]
|
||||
@ -236,10 +236,10 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
|
||||
; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
|
||||
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
|
||||
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]]
|
||||
; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
|
||||
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
|
||||
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
||||
; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec
|
||||
; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
||||
@ -351,13 +351,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec
|
||||
; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
||||
; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %542:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %543:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
|
||||
; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec
|
||||
; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec
|
||||
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec
|
||||
; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
|
||||
; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %556:vgpr_32, undef %558:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
|
||||
; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %557:vgpr_32, undef %559:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
.expVert:
|
||||
%0 = extractelement <31 x i32> %userData, i64 2
|
||||
@ -406,7 +406,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
%40 = and i32 %rootDesc58.ii1.i, 65535
|
||||
%41 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %rootDesc58.ii0.i, i32 0
|
||||
%42 = insertelement <4 x i32> %41, i32 %40, i32 1
|
||||
%43 = and i32 undef, 65535
|
||||
%43 = and i32 0, 65535
|
||||
%44 = insertelement <4 x i32> poison, i32 %43, i32 1
|
||||
%45 = load <4 x i32>, ptr addrspace(4) poison, align 16
|
||||
%46 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %45, i32 0, i32 0, i32 0, i32 0)
|
||||
@ -470,7 +470,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
%104 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %103, i32 0, i32 0, i32 0, i32 0)
|
||||
%105 = add i32 %104, -34
|
||||
%106 = or i32 %101, %105
|
||||
%107 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
|
||||
%undef = freeze i32 poison
|
||||
%107 = call i32 @llvm.amdgcn.readfirstlane(i32 %undef)
|
||||
%108 = sext i32 %107 to i64
|
||||
%109 = getelementptr i8, ptr addrspace(4) %91, i64 %108
|
||||
%110 = load <4 x i32>, ptr addrspace(4) %109, align 16
|
||||
@ -490,7 +491,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
%124 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
||||
%125 = add i32 %124, -39
|
||||
%126 = or i32 %123, %125
|
||||
%127 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
|
||||
%undef1 = freeze i32 poison
|
||||
%127 = call i32 @llvm.amdgcn.readfirstlane(i32 %undef1)
|
||||
%128 = sext i32 %127 to i64
|
||||
%129 = getelementptr i8, ptr addrspace(4) %32, i64 %128
|
||||
%130 = load <4 x i32>, ptr addrspace(4) %129, align 16
|
||||
@ -513,7 +515,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
%147 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %146, i32 0, i32 0, i32 0, i32 0)
|
||||
%148 = add i32 %147, -53
|
||||
%149 = or i32 %144, %148
|
||||
%150 = sext i32 undef to i64
|
||||
%150 = sext i32 0 to i64
|
||||
%151 = getelementptr i8, ptr addrspace(4) %134, i64 %150
|
||||
%152 = load <4 x i32>, ptr addrspace(4) %151, align 16
|
||||
%153 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %152, i32 0, i32 0, i32 0, i32 0)
|
||||
@ -574,7 +576,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
%208 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %207, i32 0)
|
||||
%209 = add i32 %208, -130
|
||||
%210 = or i32 %205, %209
|
||||
%211 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 undef, i32 0
|
||||
%211 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 0, i32 0
|
||||
%212 = ptrtoint ptr addrspace(6) %211 to i32
|
||||
%213 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %212, i32 0)
|
||||
%214 = add i32 %213, -178
|
||||
@ -617,7 +619,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
%251 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %250, i32 0)
|
||||
%252 = add i32 %251, -249
|
||||
%253 = or i32 %248, %252
|
||||
%254 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 undef, i32 0
|
||||
%254 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 0, i32 0
|
||||
%255 = ptrtoint ptr addrspace(6) %254 to i32
|
||||
%256 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %255, i32 0)
|
||||
%257 = add i32 %256, -297
|
||||
@ -661,7 +663,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
%295 = sext i32 %294 to i64
|
||||
%296 = getelementptr i8, ptr addrspace(4) %293, i64 %295
|
||||
%.ii0.i = load i32, ptr addrspace(4) %296, align 8
|
||||
%297 = and i32 undef, 65535
|
||||
%297 = and i32 0, 65535
|
||||
%298 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %.ii0.i, i32 0
|
||||
%299 = insertelement <4 x i32> %298, i32 %297, i32 1
|
||||
%300 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %299, i32 0, i32 0)
|
||||
|
@ -8,6 +8,7 @@
|
||||
define amdgpu_kernel void @func() #0 {
|
||||
; CHECK-LABEL: func:
|
||||
; CHECK: ; %bb.0: ; %B0
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s8, 0
|
||||
; CHECK-NEXT: s_mov_b32 s0, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %B30.1
|
||||
@ -18,17 +19,19 @@ define amdgpu_kernel void @func() #0 {
|
||||
; CHECK-NEXT: ds_write_b32 v0, v0
|
||||
; CHECK-NEXT: s_endpgm
|
||||
B0:
|
||||
br i1 undef, label %B1, label %B2
|
||||
%id = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%cmp = icmp eq i32 %id, 0
|
||||
br i1 %cmp, label %B1, label %B2
|
||||
|
||||
B1:
|
||||
br label %B2
|
||||
|
||||
B2:
|
||||
%v0 = phi <4 x float> [ zeroinitializer, %B1 ], [ <float 0.0, float 0.0, float 0.0, float poison>, %B0 ]
|
||||
br i1 undef, label %B30.1, label %B30.2
|
||||
br i1 %cmp, label %B30.1, label %B30.2
|
||||
|
||||
B30.1:
|
||||
%sub = fsub <4 x float> %v0, undef
|
||||
%sub = fsub <4 x float> %v0, splat (float 0x7FF8000000000000)
|
||||
br label %B30.2
|
||||
|
||||
B30.2:
|
||||
@ -73,7 +76,7 @@ bb:
|
||||
%tmp3 = bitcast i32 %tmp1 to float
|
||||
%tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
|
||||
%tmp5 = extractelement <4 x float> %tmp4, i32 0
|
||||
%tmp6 = fmul float %tmp5, undef
|
||||
%tmp6 = fmul float %tmp5, 0x7FF8000000000000
|
||||
%tmp7 = fadd float %tmp6, %tmp6
|
||||
%tmp8 = insertelement <4 x i32> %tmp2, i32 %tmp, i32 1
|
||||
store <4 x i32> %tmp8, ptr addrspace(1) poison, align 16
|
||||
|
@ -1150,6 +1150,10 @@ define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) {
|
||||
; SI-NEXT: v_add_i32_e64 v0, s[4:5], 8, v0
|
||||
; SI-NEXT: .LBB20_2: ; %bb1
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: ;;#ASMSTART
|
||||
; SI-NEXT: ; def s4
|
||||
; SI-NEXT: ;;#ASMEND
|
||||
; SI-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; SI-NEXT: s_cbranch_scc1 .LBB20_1
|
||||
; SI-NEXT: ; %bb.3: ; %bb2
|
||||
; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
|
||||
@ -1173,6 +1177,10 @@ define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) {
|
||||
; VI-NEXT: v_add_u32_e64 v0, s[4:5], 8, v0
|
||||
; VI-NEXT: .LBB20_2: ; %bb1
|
||||
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; VI-NEXT: ;;#ASMSTART
|
||||
; VI-NEXT: ; def s4
|
||||
; VI-NEXT: ;;#ASMEND
|
||||
; VI-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; VI-NEXT: s_cbranch_scc1 .LBB20_1
|
||||
; VI-NEXT: ; %bb.3: ; %bb2
|
||||
; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
|
||||
@ -1189,7 +1197,9 @@ bb1: ; preds = %bb3, %bb0
|
||||
%tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ]
|
||||
%tmp1 = add nsw i32 %tmp0, -1
|
||||
%tmp2 = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tmp1
|
||||
br i1 undef, label %bb2, label %bb3
|
||||
%cond = call i32 asm "; def $0","=s"()
|
||||
%cmp = icmp eq i32 %cond, 0
|
||||
br i1 %cmp, label %bb2, label %bb3
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
store volatile i32 1, ptr addrspace(3) %tmp2, align 4
|
||||
|
@ -538,7 +538,8 @@ if.then: ; preds = %entry
|
||||
ret void
|
||||
|
||||
if.then9: ; preds = %entry
|
||||
br i1 undef, label %sw.bb18, label %sw.bb
|
||||
%undef = freeze i1 poison
|
||||
br i1 %undef, label %sw.bb18, label %sw.bb
|
||||
|
||||
sw.bb: ; preds = %if.then9
|
||||
%i17 = load i8, ptr addrspace(1) null, align 1
|
||||
|
@ -1511,7 +1511,7 @@ define amdgpu_kernel void @test_preserve_condition_undef_flag(float %arg, i32 %a
|
||||
; GFX1064-NEXT: s_endpgm
|
||||
bb0:
|
||||
%tmp = icmp sgt i32 %arg1, 4
|
||||
%undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 undef)
|
||||
%undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 0)
|
||||
%tmp4 = select i1 %undef, float %arg, float 1.000000e+00
|
||||
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
|
||||
%tmp6 = fcmp olt float %arg2, 1.000000e+00
|
||||
@ -2329,7 +2329,7 @@ for.body.lr.ph: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
br i1 undef, label %for.end, label %for.body
|
||||
br i1 poison, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
|
@ -16,7 +16,7 @@ define amdgpu_cs void @shader(i32 %arg0, i32 %arg1, <8 x i32> inreg %arg2, ptr a
|
||||
%bload1.f = bitcast i32 %bload1 to float
|
||||
%bload2.f = bitcast i32 %bload2 to float
|
||||
%bload3.f = bitcast i32 %bload3 to float
|
||||
%istore0 = insertelement <4 x float> undef, float %bload0.f, i32 0
|
||||
%istore0 = insertelement <4 x float> poison, float %bload0.f, i32 0
|
||||
%istore1 = insertelement <4 x float> %istore0, float %bload0.f, i32 1
|
||||
%istore2 = insertelement <4 x float> %istore1, float %bload0.f, i32 2
|
||||
%istore3 = insertelement <4 x float> %istore2, float %bload0.f, i32 3
|
||||
|
@ -59,7 +59,7 @@
|
||||
br i1 %0, label %bb2, label %bb4, !dbg !12, !amdgpu.uniform !7
|
||||
|
||||
bb2: ; preds = %Flow
|
||||
store volatile i32 17, ptr addrspace(1) undef, align 4, !dbg !13
|
||||
store volatile i32 17, ptr addrspace(1) poison, align 4, !dbg !13
|
||||
br label %bb4, !dbg !14, !amdgpu.uniform !7
|
||||
|
||||
bb3: ; preds = %bb0
|
||||
|
@ -51,7 +51,7 @@ bb0:
|
||||
br i1 %tmp, label %bb2, label %bb3
|
||||
|
||||
bb2:
|
||||
store volatile i32 17, ptr addrspace(1) undef
|
||||
store volatile i32 17, ptr addrspace(1) poison
|
||||
br label %bb4
|
||||
|
||||
bb3:
|
||||
|
@ -33,11 +33,11 @@ body: |
|
||||
; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM
|
||||
|
||||
%0 = COPY $sgpr4_sgpr5
|
||||
%1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
|
||||
%2 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
|
||||
%3 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( invariant load (s64) from `ptr addrspace(4) undef`)
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(4) undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(2) undef`)
|
||||
%6 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(1) undef`)
|
||||
%1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
|
||||
%2 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
|
||||
%3 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( invariant load (s64) from `ptr addrspace(4) poison`)
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(4) poison`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(2) poison`)
|
||||
%6 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(1) poison`)
|
||||
|
||||
...
|
||||
|
@ -74,14 +74,14 @@ body: |
|
||||
liveins: $sgpr4_sgpr5
|
||||
|
||||
S_WAITCNT 0
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
|
||||
$sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) undef`)
|
||||
$sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
|
||||
$sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) undef`)
|
||||
$sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) undef`)
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
|
||||
$sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
|
||||
$sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
|
||||
$sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
|
||||
$sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
|
||||
S_WAITCNT 127
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
|
||||
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
|
||||
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
|
||||
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1, implicit $exec
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
|
||||
FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, 19, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst (s32) into %ir.agent_out)
|
||||
|
Loading…
x
Reference in New Issue
Block a user