mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-27 13:26:08 +00:00

The AMDGPU kernel ABI is not directly representable in SPIR-V, since it relies on passing aggregates `byref`, and SPIR-V only encodes `byval` (which the AMDGPU BE disallows for kernel arguments). As a temporary solution to this mismatch, we add special handling for AMDGCN flavoured SPIR-V, whereby aggregates are passed as direct, both to kernels and to normal functions. This is not ideal (there are pathological cases where performance is heavily impacted), but empirically robust and guaranteed to work as the AMDGPU BE retains handling of `direct` passing for legacy reasons. We will revisit this in the future, but as it stands it is enough to pass a wide array of integration tests and generates correct SPIR-V and correct reverse translation into LLVM IR. The amdgpu-kernel-arg-pointer-type test is updated via the automated script, and thus becomes quite noisy.
47 lines
2.1 KiB
Plaintext
47 lines
2.1 KiB
Plaintext
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \
|
|
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
|
|
// RUN: %clang_cc1 -x hip -triple spirv64-amd-amdhsa -fcuda-is-device \
|
|
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCNSPIRV %s
|
|
// RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \
|
|
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
|
|
#include "Inputs/cuda.h"
|
|
|
|
struct A {
|
|
int a[32];
|
|
float *p;
|
|
};
|
|
|
|
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
|
|
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(%struct.A %{{.+}})
|
|
// NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x)
|
|
__global__ void kernel(A x) {
|
|
}
|
|
|
|
class Kernel {
|
|
public:
|
|
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
|
|
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %{{.+}})
|
|
// NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x)
|
|
static __global__ void memberKernel(A x){}
|
|
template<typename T> static __global__ void templateMemberKernel(T x) {}
|
|
};
|
|
|
|
|
|
template <typename T>
|
|
__global__ void templateKernel(T x) {}
|
|
|
|
void launch(void*);
|
|
|
|
void test() {
|
|
Kernel K;
|
|
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
|
|
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(%struct.A %{{.+}})
|
|
// NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x)
|
|
launch((void*)templateKernel<A>);
|
|
|
|
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
|
|
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %{{.+}}
|
|
// NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x)
|
|
launch((void*)Kernel::templateMemberKernel<A>);
|
|
}
|