mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-14 17:06:38 +00:00
[Offload] Guard HSA implicit arguments if they aren't created (#133073)
Summary: We conditionally allocate the implicit arguments, so they possibly are null. The flang compiler seems to hit this case, even though it shouldn't when it's supposed to conform to the HSA code object. For now guard this to fix the regression and cover a case in the future where someone rolls a fully custom implementatation. Fixes: https://github.com/llvm/llvm-project/issues/132982
This commit is contained in:
parent
1b07e865a1
commit
75f810e025
@ -3363,16 +3363,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
|
||||
if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
|
||||
return Err;
|
||||
|
||||
hsa_utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
|
||||
if (ArgsSize == LaunchParams.Size + getImplicitArgsSize()) {
|
||||
// Initialize implicit arguments.
|
||||
ImplArgs = reinterpret_cast<hsa_utils::AMDGPUImplicitArgsTy *>(
|
||||
utils::advancePtr(AllArgs, LaunchParams.Size));
|
||||
|
||||
// Initialize the implicit arguments to zero.
|
||||
std::memset(ImplArgs, 0, getImplicitArgsSize());
|
||||
}
|
||||
|
||||
// Copy the explicit arguments.
|
||||
// TODO: We should expose the args memory manager alloc to the common part as
|
||||
// alternative to copying them twice.
|
||||
@ -3385,17 +3375,24 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
|
||||
if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream))
|
||||
return Err;
|
||||
|
||||
// Set the COV5+ implicit arguments to the appropriate values.
|
||||
ImplArgs->BlockCountX = NumBlocks[0];
|
||||
ImplArgs->BlockCountY = NumBlocks[1];
|
||||
ImplArgs->BlockCountZ = NumBlocks[2];
|
||||
ImplArgs->GroupSizeX = NumThreads[0];
|
||||
ImplArgs->GroupSizeY = NumThreads[1];
|
||||
ImplArgs->GroupSizeZ = NumThreads[2];
|
||||
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
|
||||
? 3
|
||||
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
|
||||
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
|
||||
hsa_utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
|
||||
if (ArgsSize == LaunchParams.Size + getImplicitArgsSize()) {
|
||||
ImplArgs = reinterpret_cast<hsa_utils::AMDGPUImplicitArgsTy *>(
|
||||
utils::advancePtr(AllArgs, LaunchParams.Size));
|
||||
|
||||
// Set the COV5+ implicit arguments to the appropriate values.
|
||||
std::memset(ImplArgs, 0, getImplicitArgsSize());
|
||||
ImplArgs->BlockCountX = NumBlocks[0];
|
||||
ImplArgs->BlockCountY = NumBlocks[1];
|
||||
ImplArgs->BlockCountZ = NumBlocks[2];
|
||||
ImplArgs->GroupSizeX = NumThreads[0];
|
||||
ImplArgs->GroupSizeY = NumThreads[1];
|
||||
ImplArgs->GroupSizeZ = NumThreads[2];
|
||||
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
|
||||
? 3
|
||||
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
|
||||
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
|
||||
}
|
||||
|
||||
// Push the kernel launch into the stream.
|
||||
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
|
||||
|
Loading…
x
Reference in New Issue
Block a user