[Offload] Guard HSA implicit arguments if they aren't created (#133073)

Summary:
We conditionally allocate the implicit arguments, so they possibly are
null. The flang compiler seems to hit this case, even though it
shouldn't when it's supposed to conform to the HSA code object. For now
guard this to fix the regression and cover a case in the future where
someone rolls a fully custom implementatation.

Fixes: https://github.com/llvm/llvm-project/issues/132982
This commit is contained in:
Joseph Huber 2025-03-26 08:54:33 -05:00 committed by GitHub
parent 1b07e865a1
commit 75f810e025
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3363,16 +3363,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
return Err;
hsa_utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
if (ArgsSize == LaunchParams.Size + getImplicitArgsSize()) {
// Initialize implicit arguments.
ImplArgs = reinterpret_cast<hsa_utils::AMDGPUImplicitArgsTy *>(
utils::advancePtr(AllArgs, LaunchParams.Size));
// Initialize the implicit arguments to zero.
std::memset(ImplArgs, 0, getImplicitArgsSize());
}
// Copy the explicit arguments.
// TODO: We should expose the args memory manager alloc to the common part as
// alternative to copying them twice.
@ -3385,17 +3375,24 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream))
return Err;
// Set the COV5+ implicit arguments to the appropriate values.
ImplArgs->BlockCountX = NumBlocks[0];
ImplArgs->BlockCountY = NumBlocks[1];
ImplArgs->BlockCountZ = NumBlocks[2];
ImplArgs->GroupSizeX = NumThreads[0];
ImplArgs->GroupSizeY = NumThreads[1];
ImplArgs->GroupSizeZ = NumThreads[2];
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
? 3
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
hsa_utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
if (ArgsSize == LaunchParams.Size + getImplicitArgsSize()) {
ImplArgs = reinterpret_cast<hsa_utils::AMDGPUImplicitArgsTy *>(
utils::advancePtr(AllArgs, LaunchParams.Size));
// Set the COV5+ implicit arguments to the appropriate values.
std::memset(ImplArgs, 0, getImplicitArgsSize());
ImplArgs->BlockCountX = NumBlocks[0];
ImplArgs->BlockCountY = NumBlocks[1];
ImplArgs->BlockCountZ = NumBlocks[2];
ImplArgs->GroupSizeX = NumThreads[0];
ImplArgs->GroupSizeY = NumThreads[1];
ImplArgs->GroupSizeZ = NumThreads[2];
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
? 3
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
}
// Push the kernel launch into the stream.
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,