From 2f41fa387d6734c637d02cbcf985c7b312b1e23b Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 10 Apr 2025 11:31:21 -0500 Subject: [PATCH] [AMDGPU] Fix code object version not being set to 'none' (#135036) Summary: Previously, we removed the special handling for the code object version global. I erroneously thought that this meant we cold get rid of this weird `-Xclang` option. However, this also emits an LLVM IR module flag, which will then cause linking issues. --- compiler-rt/cmake/builtin-config-ix.cmake | 1 + compiler-rt/lib/builtins/CMakeLists.txt | 6 ++++++ .../modules/LLVMLibCCompileOptionRules.cmake | 2 ++ libcxx/cmake/caches/AMDGPU.cmake | 6 ++++-- offload/DeviceRTL/CMakeLists.txt | 2 +- offload/DeviceRTL/src/Mapping.cpp | 8 ++++++++ offload/test/api/amdgpu_code_object.c | 16 ++++++++++++++++ 7 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 offload/test/api/amdgpu_code_object.c diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index e1945ba2b223..7bd3269bd999 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -22,6 +22,7 @@ builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG) builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG) builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG) +builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG) builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG) builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG) builtin_check_c_compiler_flag(-fcf-protection=full COMPILER_RT_HAS_FCF_PROTECTION_FLAG) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 5d78b5a78042..3cdbf21ed403 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -833,6 +833,12 @@ else () append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS) append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG -fconvergent-functions BUILTIN_CFLAGS) + + # AMDGPU targets want to use a generic ABI. + if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn") + append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG + "SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS) + endif() endif() set(BUILTIN_DEFS "") diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index ddd18ef293c8..0facb0b9be0c 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -215,6 +215,8 @@ function(_get_common_compile_options output_var flags) if(LIBC_CUDA_ROOT) list(APPEND compile_options "--cuda-path=${LIBC_CUDA_ROOT}") endif() + elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) + list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none") endif() endif() set(${output_var} ${compile_options} PARENT_SCOPE) diff --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake index d4aa28b4134e..e7bf3f53891f 100644 --- a/libcxx/cmake/caches/AMDGPU.cmake +++ b/libcxx/cmake/caches/AMDGPU.cmake @@ -32,6 +32,8 @@ set(LIBCXX_TEST_CONFIG "amdgpu-libc++-shared.cfg.in" CACHE STRING "") set(LIBCXX_TEST_PARAMS "optimization=none;long_tests=False;executor=amdhsa-loader" CACHE STRING "") # Necessary compile flags for AMDGPU. -set(LIBCXX_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "") -set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "") +set(LIBCXX_ADDITIONAL_COMPILE_FLAGS + "-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "") +set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS + "-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "") set(CMAKE_REQUIRED_FLAGS "-nogpulib" CACHE STRING "") diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt index 07888217b6c6..8f2a1fd01fab 100644 --- a/offload/DeviceRTL/CMakeLists.txt +++ b/offload/DeviceRTL/CMakeLists.txt @@ -255,7 +255,7 @@ function(compileDeviceRTLLibrary target_name target_triple) endfunction() add_custom_target(omptarget.devicertl.amdgpu) -compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa) +compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none) add_custom_target(omptarget.devicertl.nvptx) compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63) diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp index a9e027727b04..e951556c2ad4 100644 --- a/offload/DeviceRTL/src/Mapping.cpp +++ b/offload/DeviceRTL/src/Mapping.cpp @@ -20,6 +20,14 @@ using namespace ompx; +// FIXME: This resolves the handling for the AMDGPU workgroup size when the ABI +// is set to 'none'. We only support COV5+ but this can be removed when COV4 is +// fully deprecated. +#ifdef __AMDGPU__ +extern const inline uint32_t __oclc_ABI_version = 500; +[[gnu::alias("__oclc_ABI_version")]] const uint32_t __oclc_ABI_version__; +#endif + static bool isInLastWarp() { uint32_t MainTId = (mapping::getNumberOfThreadsInBlock() - 1) & ~(mapping::getWarpSize() - 1); diff --git a/offload/test/api/amdgpu_code_object.c b/offload/test/api/amdgpu_code_object.c new file mode 100644 index 000000000000..95d14f6772e7 --- /dev/null +++ b/offload/test/api/amdgpu_code_object.c @@ -0,0 +1,16 @@ +// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -Xclang \ +// RUN: -mcode-object-version=5 +// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa + +// REQUIRES: amdgcn-amd-amdhsa + +#include + +// Test to make sure we can build and run with the previous COV. +int main() { +#pragma omp target + ; + + // CHECK: PASS + printf("PASS\n"); +}