llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
Alex Voicu 6e0b0038cd
[clang][OpenCL][CodeGen][AMDGPU] Do not use private as the default AS for when generic is available (#112442)
Currently, for AMDGPU, when compiling for OpenCL, we unconditionally use
`private` as the default address space. This is wrong for cases where
the `generic` address space is available, and is corrected via this
patch. In general, this AS map abuse is a bad hack and we should re-work
it altogether, but at least after this patch we will stop being
incorrect for e.g. OpenCL 2.0.
2024-10-22 12:05:48 +01:00

1311 lines
98 KiB
Common Lisp

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=X86 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN %s
// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN20 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -check-prefixes=SPIR %s
// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -check-prefixes=AMDGCN30-GVAR %s
// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN30 %s
typedef int int2 __attribute__((ext_vector_type(2)));
typedef struct {
int cells[9];
} Mat3X3;
typedef struct {
int cells[16];
} Mat4X4;
typedef struct {
int cells[1024];
} Mat32X32;
typedef struct {
int cells[4096];
} Mat64X64;
struct StructOneMember {
int2 x;
};
struct StructTwoMember {
int2 x;
int2 y;
};
struct LargeStructOneMember {
int2 x[100];
};
struct LargeStructTwoMember {
int2 x[40];
int2 y[20];
};
#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
struct LargeStructOneMember g_s;
#endif
//
// X86-LABEL: define void @foo(
// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4
// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo(
// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
// AMDGCN20-LABEL: define dso_local %struct.Mat4X4 @foo(
// AMDGCN20-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
// SPIR-LABEL: define dso_local spir_func void @foo(
// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local %struct.Mat4X4 @foo(
// AMDGCN30-GVAR-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGCN30-GVAR-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
// AMDGCN30-LABEL: define dso_local %struct.Mat4X4 @foo(
// AMDGCN30-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
// AMDGCN30-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
// AMDGCN30-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGCN30-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
Mat4X4 out;
return out;
}
//
// X86-LABEL: define spir_kernel void @ker(
// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4
// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4
// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0
// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1
// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false)
// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]]
// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker(
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker(
// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @ker(
// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4
// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4
// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0
// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1
// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false)
// SPIR-NEXT: call spir_func void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]]
// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker(
// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
// AMDGCN30-GVAR-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
// AMDGCN30-GVAR-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker(
// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
// AMDGCN30-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
// AMDGCN30-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
// AMDGCN30-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
// AMDGCN30-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
// AMDGCN30-NEXT: ret void
//
kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
out[0] = foo(in[1]);
}
//
// X86-LABEL: define void @foo_large(
// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4
// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local void @foo_large(
// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @foo_large(
// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @foo_large(
// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local void @foo_large(
// AMDGCN30-GVAR-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local void @foo_large(
// AMDGCN30-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
// AMDGCN30-NEXT: ret void
//
Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
Mat64X64 out;
return out;
}
//
// X86-LABEL: define spir_kernel void @ker_large(
// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4
// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4
// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0
// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1
// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false)
// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large(
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker_large(
// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
// AMDGCN20-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @ker_large(
// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4
// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4
// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0
// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1
// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false)
// SPIR-NEXT: call spir_func void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker_large(
// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
// AMDGCN30-GVAR-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker_large(
// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
// AMDGCN30-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
// AMDGCN30-NEXT: ret void
//
kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
out[0] = foo_large(in[1]);
}
//
// X86-LABEL: define void @FuncOneMember(
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false)
// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local void @FuncOneMember(
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @FuncOneMember(
// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @FuncOneMember(
// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneMember(
// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local void @FuncOneMember(
// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
// AMDGCN30-NEXT: ret void
//
void FuncOneMember(struct StructOneMember u) {
u.x = (int2)(0, 0);
}
//
// X86-LABEL: define void @FuncOneLargeMember(
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false)
// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0
// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember(
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember(
// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0
// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember(
// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0
// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneLargeMember(
// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local void @FuncOneLargeMember(
// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
// AMDGCN30-NEXT: ret void
//
void FuncOneLargeMember(struct LargeStructOneMember u) {
u.x[0] = (int2)(0, 0);
}
#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_globl(
// AMDGCN20-SAME: ) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN20-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_globl(
// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: ret void
//
void test_indirect_arg_globl(void) {
FuncOneLargeMember(g_s);
}
#endif
//
// X86-LABEL: define spir_kernel void @test_indirect_arg_local(
// X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4
// X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false)
// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
// AMDGCN20-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @test_indirect_arg_local(
// SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
// SPIR-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false)
// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
// AMDGCN30-GVAR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
// AMDGCN30-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN30-NEXT: ret void
//
kernel void test_indirect_arg_local(void) {
local struct LargeStructOneMember l_s;
FuncOneLargeMember(l_s);
}
//
// X86-LABEL: define void @test_indirect_arg_private(
// X86-SAME: ) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]]
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private(
// AMDGCN-SAME: ) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_private(
// AMDGCN20-SAME: ) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private(
// SPIR-SAME: ) #[[ATTR0]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_private(
// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local void @test_indirect_arg_private(
// AMDGCN30-SAME: ) #[[ATTR0]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
// AMDGCN30-NEXT: ret void
//
void test_indirect_arg_private(void) {
struct LargeStructOneMember p_s;
FuncOneLargeMember(p_s);
}
//
// X86-LABEL: define spir_kernel void @KernelOneMember(
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
// AMDGCN20-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8
// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMember(
// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
// AMDGCN30-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
// AMDGCN30-NEXT: ret void
//
kernel void KernelOneMember(struct StructOneMember u) {
FuncOneMember(u);
}
//
// X86-LABEL: define spir_kernel void @KernelOneMemberSpir(
// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4
// X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4
// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false)
// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
// AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr
// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMemberSpir(
// SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8
// SPIR-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4
// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4
// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false)
// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
// AMDGCN30-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN30-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
// AMDGCN30-NEXT: ret void
//
kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
FuncOneMember(*u);
}
//
// X86-LABEL: define spir_kernel void @KernelLargeOneMember(
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false)
// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember(
// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
// AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
// AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// AMDGCN30-NEXT: ret void
//
kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
FuncOneLargeMember(u);
}
//
// X86-LABEL: define void @FuncTwoMember(
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false)
// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local void @FuncTwoMember(
// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @FuncTwoMember(
// AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8
// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8
// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
// AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @FuncTwoMember(
// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[Y]], align 8
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local void @FuncTwoMember(
// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local void @FuncTwoMember(
// AMDGCN30-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN30-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
// AMDGCN30-NEXT: ret void
//
void FuncTwoMember(struct StructTwoMember u) {
u.y = (int2)(0, 0);
}
//
// X86-LABEL: define void @FuncLargeTwoMember(
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8
// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false)
// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0
// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember(
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember(
// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0
// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember(
// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0
// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local void @FuncLargeTwoMember(
// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local void @FuncLargeTwoMember(
// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
// AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
// AMDGCN30-NEXT: ret void
//
void FuncLargeTwoMember(struct LargeStructTwoMember u) {
u.y[0] = (int2)(0, 0);
}
//
// X86-LABEL: define spir_kernel void @KernelTwoMember(
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]]
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8
// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8
// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
// AMDGCN20-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @KernelTwoMember(
// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: call spir_func void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
// AMDGCN30-GVAR-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
// AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
// AMDGCN30-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
// AMDGCN30-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
// AMDGCN30-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
// AMDGCN30-NEXT: ret void
//
kernel void KernelTwoMember(struct StructTwoMember u) {
FuncTwoMember(u);
}
//
// X86-LABEL: define spir_kernel void @KernelLargeTwoMember(
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// X86-NEXT: [[ENTRY:.*:]]
// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]]
// X86-NEXT: ret void
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false)
// AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember(
// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] {
// SPIR-NEXT: [[ENTRY:.*:]]
// SPIR-NEXT: call spir_func void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// SPIR-NEXT: ret void
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
// AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
// AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
// AMDGCN30-GVAR-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// AMDGCN30-GVAR-NEXT: ret void
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
// AMDGCN30-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
// AMDGCN30-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
// AMDGCN30-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
// AMDGCN30-NEXT: ret void
//
kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
FuncLargeTwoMember(u);
}
//.
// X86: [[META4]] = !{i32 1, i32 1}
// X86: [[META5]] = !{!"none", !"none"}
// X86: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
// X86: [[META7]] = !{!"", !""}
// X86: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
// X86: [[META9]] = !{}
// X86: [[META10]] = !{i32 0}
// X86: [[META11]] = !{!"none"}
// X86: [[META12]] = !{!"struct StructOneMember"}
// X86: [[META13]] = !{!""}
// X86: [[META14]] = !{i32 1}
// X86: [[META15]] = !{!"struct StructOneMember*"}
// X86: [[META16]] = !{!"struct LargeStructOneMember"}
// X86: [[META17]] = !{!"struct StructTwoMember"}
// X86: [[META18]] = !{!"struct LargeStructTwoMember"}
//.
// AMDGCN: [[META4]] = !{i32 1, i32 1}
// AMDGCN: [[META5]] = !{!"none", !"none"}
// AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
// AMDGCN: [[META7]] = !{!"", !""}
// AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
// AMDGCN: [[META9]] = !{}
// AMDGCN: [[META10]] = !{i32 0}
// AMDGCN: [[META11]] = !{!"none"}
// AMDGCN: [[META12]] = !{!"struct StructOneMember"}
// AMDGCN: [[META13]] = !{!""}
// AMDGCN: [[META14]] = !{i32 1}
// AMDGCN: [[META15]] = !{!"struct StructOneMember*"}
// AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"}
// AMDGCN: [[META17]] = !{!"struct StructTwoMember"}
// AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"}
//.
// AMDGCN20: [[META4]] = !{i32 1, i32 1}
// AMDGCN20: [[META5]] = !{!"none", !"none"}
// AMDGCN20: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
// AMDGCN20: [[META7]] = !{!"", !""}
// AMDGCN20: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
// AMDGCN20: [[META9]] = !{}
// AMDGCN20: [[META10]] = !{i32 0}
// AMDGCN20: [[META11]] = !{!"none"}
// AMDGCN20: [[META12]] = !{!"struct StructOneMember"}
// AMDGCN20: [[META13]] = !{!""}
// AMDGCN20: [[META14]] = !{i32 1}
// AMDGCN20: [[META15]] = !{!"struct StructOneMember*"}
// AMDGCN20: [[META16]] = !{!"struct LargeStructOneMember"}
// AMDGCN20: [[META17]] = !{!"struct StructTwoMember"}
// AMDGCN20: [[META18]] = !{!"struct LargeStructTwoMember"}
//.
// SPIR: [[META3]] = !{i32 1, i32 1}
// SPIR: [[META4]] = !{!"none", !"none"}
// SPIR: [[META5]] = !{!"Mat3X3*", !"Mat4X4*"}
// SPIR: [[META6]] = !{!"", !""}
// SPIR: [[META7]] = !{!"Mat32X32*", !"Mat64X64*"}
// SPIR: [[META8]] = !{}
// SPIR: [[META9]] = !{i32 0}
// SPIR: [[META10]] = !{!"none"}
// SPIR: [[META11]] = !{!"struct StructOneMember"}
// SPIR: [[META12]] = !{!""}
// SPIR: [[META13]] = !{i32 1}
// SPIR: [[META14]] = !{!"struct StructOneMember*"}
// SPIR: [[META15]] = !{!"struct LargeStructOneMember"}
// SPIR: [[META16]] = !{!"struct StructTwoMember"}
// SPIR: [[META17]] = !{!"struct LargeStructTwoMember"}
//.
// AMDGCN30-GVAR: [[META4]] = !{i32 1, i32 1}
// AMDGCN30-GVAR: [[META5]] = !{!"none", !"none"}
// AMDGCN30-GVAR: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
// AMDGCN30-GVAR: [[META7]] = !{!"", !""}
// AMDGCN30-GVAR: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
// AMDGCN30-GVAR: [[META9]] = !{}
// AMDGCN30-GVAR: [[META10]] = !{i32 0}
// AMDGCN30-GVAR: [[META11]] = !{!"none"}
// AMDGCN30-GVAR: [[META12]] = !{!"struct StructOneMember"}
// AMDGCN30-GVAR: [[META13]] = !{!""}
// AMDGCN30-GVAR: [[META14]] = !{i32 1}
// AMDGCN30-GVAR: [[META15]] = !{!"struct StructOneMember*"}
// AMDGCN30-GVAR: [[META16]] = !{!"struct LargeStructOneMember"}
// AMDGCN30-GVAR: [[META17]] = !{!"struct StructTwoMember"}
// AMDGCN30-GVAR: [[META18]] = !{!"struct LargeStructTwoMember"}
//.
// AMDGCN30: [[META4]] = !{i32 1, i32 1}
// AMDGCN30: [[META5]] = !{!"none", !"none"}
// AMDGCN30: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
// AMDGCN30: [[META7]] = !{!"", !""}
// AMDGCN30: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
// AMDGCN30: [[META9]] = !{}
// AMDGCN30: [[META10]] = !{i32 0}
// AMDGCN30: [[META11]] = !{!"none"}
// AMDGCN30: [[META12]] = !{!"struct StructOneMember"}
// AMDGCN30: [[META13]] = !{!""}
// AMDGCN30: [[META14]] = !{i32 1}
// AMDGCN30: [[META15]] = !{!"struct StructOneMember*"}
// AMDGCN30: [[META16]] = !{!"struct LargeStructOneMember"}
// AMDGCN30: [[META17]] = !{!"struct StructTwoMember"}
// AMDGCN30: [[META18]] = !{!"struct LargeStructTwoMember"}
//.