[Offload] Remove handling for COV4 binaries from offload/ (#131033)

Summary: We moved from cov4 to cov5 a long time ago, and it guards simplifying some front end code, so we should be able to move up with this.
2025-04-21 11:46:49 +00:00 · 2025-03-24 18:58:20 -05:00 · 2025-03-24 18:58:20 -05:00 · 25bf4e262c
commit 25bf4e262c
parent ff8aa300d6
3 changed files with 15 additions and 27 deletions
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@ -576,8 +576,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
  /// Get the HSA kernel object representing the kernel function.
  uint64_t getKernelObject() const { return KernelObject; }

-  /// Get the size of implicitargs based on the code object version
-  /// @return 56 for cov4 and 256 for cov5
+  /// Get the size of implicitargs based on the code object version.
  uint32_t getImplicitArgsSize() const { return ImplicitArgsSize; }

  /// Indicates whether or not we need to set up our own private segment size.
@ -3386,20 +3385,17 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
  if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream))
    return Err;

-  // Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
-  if (ImplArgs &&
-      getImplicitArgsSize() == sizeof(hsa_utils::AMDGPUImplicitArgsTy)) {
-    ImplArgs->BlockCountX = NumBlocks[0];
-    ImplArgs->BlockCountY = NumBlocks[1];
-    ImplArgs->BlockCountZ = NumBlocks[2];
-    ImplArgs->GroupSizeX = NumThreads[0];
-    ImplArgs->GroupSizeY = NumThreads[1];
-    ImplArgs->GroupSizeZ = NumThreads[2];
-    ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
-                             ? 3
-                             : 1 + (NumBlocks[1] * NumThreads[1] != 1);
-    ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
-  }
+  // Set the COV5+ implicit arguments to the appropriate values.
+  ImplArgs->BlockCountX = NumBlocks[0];
+  ImplArgs->BlockCountY = NumBlocks[1];
+  ImplArgs->BlockCountZ = NumBlocks[2];
+  ImplArgs->GroupSizeX = NumThreads[0];
+  ImplArgs->GroupSizeY = NumThreads[1];
+  ImplArgs->GroupSizeZ = NumThreads[2];
+  ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
+                           ? 3
+                           : 1 + (NumBlocks[1] * NumThreads[1] != 1);
+  ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;

  // Push the kernel launch into the stream.
  return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@ -40,17 +40,10 @@ struct AMDGPUImplicitArgsTy {
  uint8_t Unused2[132]; // 132 byte offset.
 };

-// Dummy struct for COV4 implicitargs.
-struct AMDGPUImplicitArgsTyCOV4 {
-  uint8_t Unused[56];
-};
-
 /// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
 /// `Version` is the ELF ABI version, e.g. COV5.
 inline uint32_t getImplicitArgsSize(uint16_t Version) {
-  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
-             ? sizeof(AMDGPUImplicitArgsTyCOV4)
-             : sizeof(AMDGPUImplicitArgsTy);
+  return sizeof(AMDGPUImplicitArgsTy);
 }

 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@ -65,10 +65,9 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
  if (Header.e_machine == EM_AMDGPU) {
    if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
      return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
-    if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 &&
-        Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
+    if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
        Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
-      return createError("Invalid AMD ABI version, must be version 4 or above");
+      return createError("Invalid AMD ABI version, must be version 5 or above");
    if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
        (Header.e_flags & EF_AMDGPU_MACH) >
            EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)