Alex Voicu ad435bcc14
[clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (#102776)
The AMDGPU kernel ABI is not directly representable in SPIR-V, since it
relies on passing aggregates `byref`, and SPIR-V only encodes `byval`
(which the AMDGPU BE disallows for kernel arguments). As a temporary
solution to this mismatch, we add special handling for AMDGCN flavoured
SPIR-V, whereby aggregates are passed as direct, both to kernels and to
normal functions. This is not ideal (there are pathological cases where
performance is heavily impacted), but empirically robust and guaranteed
to work as the AMDGPU BE retains handling of `direct` passing for legacy
reasons.

We will revisit this in the future, but as it stands it is enough to
pass a wide array of integration tests and generates correct SPIR-V and
correct reverse translation into LLVM IR. The
amdgpu-kernel-arg-pointer-type test is updated via the automated script,
and thus becomes quite noisy.
2024-08-21 13:16:59 +01:00

47 lines
2.1 KiB
Plaintext

// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
// RUN: %clang_cc1 -x hip -triple spirv64-amd-amdhsa -fcuda-is-device \
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCNSPIRV %s
// RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
#include "Inputs/cuda.h"
struct A {
int a[32];
float *p;
};
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(%struct.A %{{.+}})
// NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x)
__global__ void kernel(A x) {
}
class Kernel {
public:
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %{{.+}})
// NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x)
static __global__ void memberKernel(A x){}
template<typename T> static __global__ void templateMemberKernel(T x) {}
};
template <typename T>
__global__ void templateKernel(T x) {}
void launch(void*);
void test() {
Kernel K;
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(%struct.A %{{.+}})
// NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x)
launch((void*)templateKernel<A>);
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %{{.+}}
// NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x)
launch((void*)Kernel::templateMemberKernel<A>);
}