From 976c37ec950a93ed60068389ad05454f76da1e55 Mon Sep 17 00:00:00 2001 From: amansharma612 Date: Fri, 11 Apr 2025 21:05:01 +0530 Subject: [PATCH] [AMDGPU] Set hasSideEffects=0 for SALU psuedos (#134487) Fixes #128685 --------- Co-authored-by: Aman Sharma <210100011@iitb.ac.in> --- llvm/lib/Target/AMDGPU/SIInstructions.td | 30 ++++++++++++----------- llvm/lib/Target/AMDGPU/SOPInstructions.td | 16 ++++++------ llvm/test/CodeGen/AMDGPU/mul.ll | 10 ++++---- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 9e3011b05de6..ed45cf885114 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -231,7 +231,7 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> { let mayStore = 0; } -let usesCustomInserter = 1 in { +let usesCustomInserter = 1, hasSideEffects = 0 in { let WaveSizePredicate = isWave32 in def S_INVERSE_BALLOT_U32 : SPseudoInstSI< (outs SReg_32:$sdst), (ins SSrc_b32:$mask), @@ -243,7 +243,7 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI< (outs SReg_64:$sdst), (ins SSrc_b64:$mask), [(set i1:$sdst, (int_amdgcn_inverse_ballot i64:$mask))] >; -} // End usesCustomInserter = 1 +} // End usesCustomInserter = 1, hasSideEffects = 0 let WaveSizePredicate = isWave32 in def : GCNPat < @@ -338,21 +338,23 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI < [(set SReg_64:$sdst, (UniformBinFrag i64:$src0, i64:$src1))] >; -def S_ADD_CO_PSEUDO : SPseudoInstSI < - (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in) ->; +let hasSideEffects = 0 in { + def S_ADD_CO_PSEUDO : SPseudoInstSI < + (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in) + >; -def S_SUB_CO_PSEUDO : SPseudoInstSI < - (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in) ->; + def S_SUB_CO_PSEUDO : SPseudoInstSI < + (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in) + >; -def S_UADDO_PSEUDO : SPseudoInstSI < - (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1) ->; + def S_UADDO_PSEUDO : SPseudoInstSI < + (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1) + >; -def S_USUBO_PSEUDO : SPseudoInstSI < - (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1) ->; + def S_USUBO_PSEUDO : SPseudoInstSI < + (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1) + >; +} let OtherPredicates = [HasShaderCyclesHiLoRegisters] in def GET_SHADERCYCLESHILO : SPseudoInstSI< diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 73f4655f735a..3d3f1ba3f517 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -663,14 +663,16 @@ let SubtargetPredicate = isGFX12Plus in { } // The higher 32-bits of the inputs contain the sign extension bits. - def S_MUL_I64_I32_PSEUDO : SPseudoInstSI < - (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) - >; + let hasSideEffects = 0 in { + def S_MUL_I64_I32_PSEUDO : SPseudoInstSI < + (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) + >; - // The higher 32-bits of the inputs are zero. - def S_MUL_U64_U32_PSEUDO : SPseudoInstSI < - (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) - >; + // The higher 32-bits of the inputs are zero. + def S_MUL_U64_U32_PSEUDO : SPseudoInstSI < + (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) + >; + } } // End SubtargetPredicate = isGFX12Plus diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll index 7dd45181a835..896f48a9215b 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.ll @@ -2850,17 +2850,17 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a, ; GFX12-NEXT: s_mov_b32 s5, s3 ; GFX12-NEXT: s_mov_b32 s17, s3 ; GFX12-NEXT: s_mov_b32 s19, s3 -; GFX12-NEXT: s_mov_b32 s24, s3 +; GFX12-NEXT: s_mov_b32 s20, s3 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s2, s8 ; GFX12-NEXT: s_mov_b32 s6, s12 ; GFX12-NEXT: s_mov_b32 s4, s13 ; GFX12-NEXT: s_mul_u64 s[22:23], s[6:7], s[2:3] -; GFX12-NEXT: s_mul_u64 s[20:21], s[4:5], s[2:3] +; GFX12-NEXT: s_mul_u64 s[24:25], s[4:5], s[2:3] ; GFX12-NEXT: s_mov_b32 s2, s23 ; GFX12-NEXT: s_mov_b32 s16, s9 ; GFX12-NEXT: s_mul_u64 s[10:11], s[10:11], s[12:13] -; GFX12-NEXT: s_add_nc_u64 s[12:13], s[20:21], s[2:3] +; GFX12-NEXT: s_add_nc_u64 s[12:13], s[24:25], s[2:3] ; GFX12-NEXT: s_mul_u64 s[6:7], s[6:7], s[16:17] ; GFX12-NEXT: s_mov_b32 s2, s13 ; GFX12-NEXT: s_mov_b32 s13, s3 @@ -2871,9 +2871,9 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a, ; GFX12-NEXT: s_mov_b32 s23, s3 ; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[18:19] ; GFX12-NEXT: s_add_nc_u64 s[8:9], s[10:11], s[8:9] -; GFX12-NEXT: s_mov_b32 s25, s6 +; GFX12-NEXT: s_mov_b32 s21, s6 ; GFX12-NEXT: s_add_nc_u64 s[2:3], s[4:5], s[2:3] -; GFX12-NEXT: s_or_b64 s[6:7], s[22:23], s[24:25] +; GFX12-NEXT: s_or_b64 s[6:7], s[22:23], s[20:21] ; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[8:9] ; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3