mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-27 04:16:09 +00:00

`sret` arguments are always going to reside in the stack/`alloca` address space, which makes the current formulation where their AS is derived from the pointee somewhat quaint. This patch ensures that `sret` ends up pointing to the `alloca` AS in IR function signatures, and also guards agains trying to pass a casted `alloca`d pointer to a `sret` arg, which can happen for most languages, when compiled for targets that have a non-zero `alloca` AS (e.g. AMDGCN) / map `LangAS::default` to a non-zero value (SPIR-V). A target could still choose to do something different here, by e.g. overriding `classifyReturnType` behaviour. In a broader sense, this patch extends non-aliased indirect args to also carry an AS, which leads to changing the `getIndirect()` interface. At the moment we're only using this for (indirect) returns, but it allows for future handling of indirect args themselves. We default to using the AllocaAS as that matches what Clang is currently doing, however if, in the future, a target would opt for e.g. placing indirect returns in some other storage, with another AS, this will require revisiting. --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com> Co-authored-by: Matt Arsenault <Matthew.Arsenault@amd.com>
356 lines
22 KiB
Common Lisp
356 lines
22 KiB
Common Lisp
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
|
// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix=AMDGCN %s
|
|
|
|
typedef int int2 __attribute__((ext_vector_type(2)));
|
|
|
|
typedef struct {
|
|
int cells[9];
|
|
} Mat3X3;
|
|
|
|
typedef struct {
|
|
int cells[16];
|
|
} Mat4X4;
|
|
|
|
typedef struct {
|
|
int cells[1024];
|
|
} Mat32X32;
|
|
|
|
typedef struct {
|
|
int cells[4096];
|
|
} Mat64X64;
|
|
|
|
struct StructOneMember {
|
|
int2 x;
|
|
};
|
|
|
|
struct StructTwoMember {
|
|
int2 x;
|
|
int2 y;
|
|
};
|
|
|
|
struct LargeStructOneMember {
|
|
int2 x[100];
|
|
};
|
|
|
|
struct LargeStructTwoMember {
|
|
int2 x[40];
|
|
int2 y[20];
|
|
};
|
|
|
|
#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
|
|
struct LargeStructOneMember g_s;
|
|
#endif
|
|
|
|
|
|
// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo(
|
|
// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
|
|
// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
|
|
// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
|
|
// AMDGCN-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
|
|
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
|
|
// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
|
|
//
|
|
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
|
|
Mat4X4 out;
|
|
return out;
|
|
}
|
|
|
|
// Expect two mem copies: one for the argument "in", and one for
|
|
// the return value.
|
|
|
|
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker(
|
|
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
|
|
// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
|
// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
|
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
|
|
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
|
|
// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
|
|
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
|
|
// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
|
|
out[0] = foo(in[1]);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local void @foo_large(
|
|
// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
|
|
// AMDGCN-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
|
|
Mat64X64 out;
|
|
return out;
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large(
|
|
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
|
|
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
|
|
// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
|
// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
|
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
|
|
// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
|
|
out[0] = foo_large(in[1]);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local void @FuncOneMember(
|
|
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
|
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
|
|
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
void FuncOneMember(struct StructOneMember u) {
|
|
u.x = (int2)(0, 0);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember(
|
|
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
|
|
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
|
|
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0
|
|
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
void FuncOneLargeMember(struct LargeStructOneMember u) {
|
|
u.x[0] = (int2)(0, 0);
|
|
}
|
|
|
|
#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
|
|
// AMDGCN-LABEL: define dso_local void @test_indirect_arg_globl(
|
|
// AMDGCN-SAME: ) #[[ATTR0]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
|
|
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
void test_indirect_arg_globl(void) {
|
|
FuncOneLargeMember(g_s);
|
|
}
|
|
#endif
|
|
|
|
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
|
|
// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
|
|
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
kernel void test_indirect_arg_local(void) {
|
|
local struct LargeStructOneMember l_s;
|
|
FuncOneLargeMember(l_s);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private(
|
|
// AMDGCN-SAME: ) #[[ATTR0]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
|
|
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
void test_indirect_arg_private(void) {
|
|
struct LargeStructOneMember p_s;
|
|
FuncOneLargeMember(p_s);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
|
|
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
|
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
|
|
// AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8
|
|
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
kernel void KernelOneMember(struct StructOneMember u) {
|
|
FuncOneMember(u);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
|
|
// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr
|
|
// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
|
|
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
|
|
FuncOneMember(*u);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
|
|
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
|
|
// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false)
|
|
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
|
|
FuncOneLargeMember(u);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local void @FuncTwoMember(
|
|
// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
|
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
|
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8
|
|
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
|
// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
void FuncTwoMember(struct StructTwoMember u) {
|
|
u.y = (int2)(0, 0);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember(
|
|
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
|
|
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
|
|
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
|
|
// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
|
|
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0
|
|
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
void FuncLargeTwoMember(struct LargeStructTwoMember u) {
|
|
u.y[0] = (int2)(0, 0);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
|
|
// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
|
|
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8
|
|
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
|
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
|
|
// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8
|
|
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
|
|
// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
|
// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
|
|
// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
kernel void KernelTwoMember(struct StructTwoMember u) {
|
|
FuncTwoMember(u);
|
|
}
|
|
|
|
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
|
|
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
|
|
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
|
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
|
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
|
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
|
|
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
|
|
// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
|
|
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
|
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
|
|
// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
|
|
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false)
|
|
// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
|
// AMDGCN-NEXT: ret void
|
|
//
|
|
kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
|
|
FuncLargeTwoMember(u);
|
|
}
|
|
//.
|
|
// AMDGCN: [[META4]] = !{i32 1, i32 1}
|
|
// AMDGCN: [[META5]] = !{!"none", !"none"}
|
|
// AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
|
|
// AMDGCN: [[META7]] = !{!"", !""}
|
|
// AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
|
|
// AMDGCN: [[META9]] = !{}
|
|
// AMDGCN: [[META10]] = !{i32 0}
|
|
// AMDGCN: [[META11]] = !{!"none"}
|
|
// AMDGCN: [[META12]] = !{!"struct StructOneMember"}
|
|
// AMDGCN: [[META13]] = !{!""}
|
|
// AMDGCN: [[META14]] = !{i32 1}
|
|
// AMDGCN: [[META15]] = !{!"struct StructOneMember*"}
|
|
// AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"}
|
|
// AMDGCN: [[META17]] = !{!"struct StructTwoMember"}
|
|
// AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"}
|
|
//.
|