mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-15 22:16:30 +00:00
[AMDGPU] Set hasSideEffects=0 for SALU psuedos (#134487)
Fixes #128685 --------- Co-authored-by: Aman Sharma <210100011@iitb.ac.in>
This commit is contained in:
parent
5a41fc28f3
commit
976c37ec95
@ -231,7 +231,7 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
|
||||
let mayStore = 0;
|
||||
}
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
let usesCustomInserter = 1, hasSideEffects = 0 in {
|
||||
let WaveSizePredicate = isWave32 in
|
||||
def S_INVERSE_BALLOT_U32 : SPseudoInstSI<
|
||||
(outs SReg_32:$sdst), (ins SSrc_b32:$mask),
|
||||
@ -243,7 +243,7 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
|
||||
(outs SReg_64:$sdst), (ins SSrc_b64:$mask),
|
||||
[(set i1:$sdst, (int_amdgcn_inverse_ballot i64:$mask))]
|
||||
>;
|
||||
} // End usesCustomInserter = 1
|
||||
} // End usesCustomInserter = 1, hasSideEffects = 0
|
||||
|
||||
let WaveSizePredicate = isWave32 in
|
||||
def : GCNPat <
|
||||
@ -338,21 +338,23 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI <
|
||||
[(set SReg_64:$sdst, (UniformBinFrag<sub> i64:$src0, i64:$src1))]
|
||||
>;
|
||||
|
||||
def S_ADD_CO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
|
||||
>;
|
||||
let hasSideEffects = 0 in {
|
||||
def S_ADD_CO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
|
||||
>;
|
||||
|
||||
def S_SUB_CO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
|
||||
>;
|
||||
def S_SUB_CO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
|
||||
>;
|
||||
|
||||
def S_UADDO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
|
||||
>;
|
||||
def S_UADDO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
|
||||
>;
|
||||
|
||||
def S_USUBO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
|
||||
>;
|
||||
def S_USUBO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
|
||||
>;
|
||||
}
|
||||
|
||||
let OtherPredicates = [HasShaderCyclesHiLoRegisters] in
|
||||
def GET_SHADERCYCLESHILO : SPseudoInstSI<
|
||||
|
@ -663,14 +663,16 @@ let SubtargetPredicate = isGFX12Plus in {
|
||||
}
|
||||
|
||||
// The higher 32-bits of the inputs contain the sign extension bits.
|
||||
def S_MUL_I64_I32_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
>;
|
||||
let hasSideEffects = 0 in {
|
||||
def S_MUL_I64_I32_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
>;
|
||||
|
||||
// The higher 32-bits of the inputs are zero.
|
||||
def S_MUL_U64_U32_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
>;
|
||||
// The higher 32-bits of the inputs are zero.
|
||||
def S_MUL_U64_U32_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
>;
|
||||
}
|
||||
|
||||
} // End SubtargetPredicate = isGFX12Plus
|
||||
|
||||
|
@ -2850,17 +2850,17 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a,
|
||||
; GFX12-NEXT: s_mov_b32 s5, s3
|
||||
; GFX12-NEXT: s_mov_b32 s17, s3
|
||||
; GFX12-NEXT: s_mov_b32 s19, s3
|
||||
; GFX12-NEXT: s_mov_b32 s24, s3
|
||||
; GFX12-NEXT: s_mov_b32 s20, s3
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_mov_b32 s2, s8
|
||||
; GFX12-NEXT: s_mov_b32 s6, s12
|
||||
; GFX12-NEXT: s_mov_b32 s4, s13
|
||||
; GFX12-NEXT: s_mul_u64 s[22:23], s[6:7], s[2:3]
|
||||
; GFX12-NEXT: s_mul_u64 s[20:21], s[4:5], s[2:3]
|
||||
; GFX12-NEXT: s_mul_u64 s[24:25], s[4:5], s[2:3]
|
||||
; GFX12-NEXT: s_mov_b32 s2, s23
|
||||
; GFX12-NEXT: s_mov_b32 s16, s9
|
||||
; GFX12-NEXT: s_mul_u64 s[10:11], s[10:11], s[12:13]
|
||||
; GFX12-NEXT: s_add_nc_u64 s[12:13], s[20:21], s[2:3]
|
||||
; GFX12-NEXT: s_add_nc_u64 s[12:13], s[24:25], s[2:3]
|
||||
; GFX12-NEXT: s_mul_u64 s[6:7], s[6:7], s[16:17]
|
||||
; GFX12-NEXT: s_mov_b32 s2, s13
|
||||
; GFX12-NEXT: s_mov_b32 s13, s3
|
||||
@ -2871,9 +2871,9 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a,
|
||||
; GFX12-NEXT: s_mov_b32 s23, s3
|
||||
; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[18:19]
|
||||
; GFX12-NEXT: s_add_nc_u64 s[8:9], s[10:11], s[8:9]
|
||||
; GFX12-NEXT: s_mov_b32 s25, s6
|
||||
; GFX12-NEXT: s_mov_b32 s21, s6
|
||||
; GFX12-NEXT: s_add_nc_u64 s[2:3], s[4:5], s[2:3]
|
||||
; GFX12-NEXT: s_or_b64 s[6:7], s[22:23], s[24:25]
|
||||
; GFX12-NEXT: s_or_b64 s[6:7], s[22:23], s[20:21]
|
||||
; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[8:9]
|
||||
; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
|
||||
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
||||
|
Loading…
x
Reference in New Issue
Block a user