0
0
mirror of https://github.com/llvm/llvm-project.git synced 2025-04-21 11:46:49 +00:00

[Offload] Remove handling for COV4 binaries from offload/ ()

Summary:
We moved from cov4 to cov5 a long time ago, and it guards simplifying
some front end code, so we should be able to move up with this.
This commit is contained in:
Joseph Huber 2025-03-24 18:58:20 -05:00 committed by GitHub
parent ff8aa300d6
commit 25bf4e262c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 15 additions and 27 deletions
offload/plugins-nextgen
amdgpu
common/src/Utils

@ -576,8 +576,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
/// Get the HSA kernel object representing the kernel function.
uint64_t getKernelObject() const { return KernelObject; }
/// Get the size of implicitargs based on the code object version
/// @return 56 for cov4 and 256 for cov5
/// Get the size of implicitargs based on the code object version.
uint32_t getImplicitArgsSize() const { return ImplicitArgsSize; }
/// Indicates whether or not we need to set up our own private segment size.
@ -3386,20 +3385,17 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream))
return Err;
// Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
if (ImplArgs &&
getImplicitArgsSize() == sizeof(hsa_utils::AMDGPUImplicitArgsTy)) {
ImplArgs->BlockCountX = NumBlocks[0];
ImplArgs->BlockCountY = NumBlocks[1];
ImplArgs->BlockCountZ = NumBlocks[2];
ImplArgs->GroupSizeX = NumThreads[0];
ImplArgs->GroupSizeY = NumThreads[1];
ImplArgs->GroupSizeZ = NumThreads[2];
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
? 3
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
}
// Set the COV5+ implicit arguments to the appropriate values.
ImplArgs->BlockCountX = NumBlocks[0];
ImplArgs->BlockCountY = NumBlocks[1];
ImplArgs->BlockCountZ = NumBlocks[2];
ImplArgs->GroupSizeX = NumThreads[0];
ImplArgs->GroupSizeY = NumThreads[1];
ImplArgs->GroupSizeZ = NumThreads[2];
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
? 3
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
// Push the kernel launch into the stream.
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,

@ -40,17 +40,10 @@ struct AMDGPUImplicitArgsTy {
uint8_t Unused2[132]; // 132 byte offset.
};
// Dummy struct for COV4 implicitargs.
struct AMDGPUImplicitArgsTyCOV4 {
uint8_t Unused[56];
};
/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
/// `Version` is the ELF ABI version, e.g. COV5.
inline uint32_t getImplicitArgsSize(uint16_t Version) {
return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
? sizeof(AMDGPUImplicitArgsTyCOV4)
: sizeof(AMDGPUImplicitArgsTy);
return sizeof(AMDGPUImplicitArgsTy);
}
/// Reads the AMDGPU specific metadata from the ELF file and propagates the

@ -65,10 +65,9 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
if (Header.e_machine == EM_AMDGPU) {
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 &&
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
return createError("Invalid AMD ABI version, must be version 4 or above");
return createError("Invalid AMD ABI version, must be version 5 or above");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
(Header.e_flags & EF_AMDGPU_MACH) >
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)