mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-16 11:56:38 +00:00
AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256)
Use i32 for offset instead of i16, this way it does not get interpreted as negative 16 bit offset. (cherry picked from commit 83fe85115da9dc25fa270d2ea8140113c8d49670)
This commit is contained in:
parent
03d133728a
commit
962edd3f71
@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
|
||||
0);
|
||||
}
|
||||
|
||||
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
|
||||
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1967,7 +1967,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
|
||||
return false;
|
||||
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
|
||||
return false;
|
||||
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
|
||||
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
|
||||
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
|
||||
return false;
|
||||
SAddr = SelectSAddrFI(CurDAG, SAddr);
|
||||
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
|
||||
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4956,7 +4956,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
|
||||
;
|
||||
; GFX12-LABEL: sgpr_base_large_offset:
|
||||
; GFX12: ; %bb.0: ; %entry
|
||||
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
|
||||
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
|
||||
; GFX12-NEXT: s_nop 0
|
||||
@ -5015,7 +5015,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
|
||||
;
|
||||
; GFX12-PAL-LABEL: sgpr_base_large_offset:
|
||||
; GFX12-PAL: ; %bb.0: ; %entry
|
||||
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
|
||||
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:65512
|
||||
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
|
||||
; GFX12-PAL-NEXT: s_nop 0
|
||||
@ -5068,7 +5068,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
|
||||
; GFX12: ; %bb.0: ; %entry
|
||||
; GFX12-NEXT: v_mov_b32_e32 v2, 0x1000000
|
||||
; GFX12-NEXT: s_and_b32 s0, s0, -4
|
||||
; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
|
||||
; GFX12-NEXT: s_nop 0
|
||||
@ -5133,7 +5133,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
|
||||
; GFX12-PAL: ; %bb.0: ; %entry
|
||||
; GFX12-PAL-NEXT: v_mov_b32_e32 v2, 0x1000000
|
||||
; GFX12-PAL-NEXT: s_and_b32 s0, s0, -4
|
||||
; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
|
||||
; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
|
||||
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
|
||||
; GFX12-PAL-NEXT: s_nop 0
|
||||
@ -5189,7 +5189,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
|
||||
; GFX12: ; %bb.0: ; %bb
|
||||
; GFX12-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX12-NEXT: s_add_co_i32 s0, s0, s1
|
||||
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
@ -5251,7 +5251,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
|
||||
; GFX12-PAL: ; %bb.0: ; %bb
|
||||
; GFX12-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, s1
|
||||
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
|
||||
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
|
||||
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-PAL-NEXT: s_endpgm
|
||||
bb:
|
||||
|
Loading…
x
Reference in New Issue
Block a user