llvm-project/clang/test/OpenMP/interop_irbuilder.cpp
Johannes Doerfert 16a385ba21 [OpenMP] Modernize the kernel launching interface and APIs
We already created a versioned `__tgt_kernel_arguments` struct but it
was only briefly used and its content was passed in isolation anyway.
This makes it hard to add more information in the future. With this
patch we fully embrace the struct as means to pass information from the
compiler to the plugin as part of a kernel launch.

The patch also extends and renames the struct, bumping the version
number to 2. Version 1 entries are auto-upgraded. This is in preparation
for "bare" kernel launches, per kernel dynamic shared memory, CUDA/HIP
lowering, etc.

The `__tgt_target_kernel_nowait` interface was deprecated as it was
unused. Once we actually implement support for something like that, we
can add an appropriate API.

Note: Only plugins with the `launch_kernel` interface are now supported.
      That means that a new clang won't be able to use an old runtime.
      An old clang can still use the new runtime since the libomptarget
      interface did not change.

Differential Revision: https://reviews.llvm.org/D141232
2023-01-21 11:16:21 -08:00

195 lines
13 KiB
C++

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs
// RUN: %clang_cc1 -verify -triple x86_64-unknown-linux -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
typedef void *omp_interop_t;
void test1() {
int device_id = 4;
int D0, D1;
omp_interop_t interop;
#pragma omp interop init(target \
: interop)
#pragma omp interop init(targetsync \
: interop)
#pragma omp interop init(target \
: interop) device(device_id)
#pragma omp interop init(targetsync \
: interop) device(device_id)
#pragma omp interop use(interop) depend(in \
: D0, D1) nowait
#pragma omp interop destroy(interop) depend(in \
: D0, D1)
}
struct S {
omp_interop_t interop;
void member_test();
};
void S::member_test() {
int device_id = 4;
int D0, D1;
#pragma omp interop init(target \
: interop)
#pragma omp interop init(targetsync \
: interop)
#pragma omp interop init(target \
: interop) device(device_id)
#pragma omp interop init(targetsync \
: interop) device(device_id)
#pragma omp interop use(interop) depend(in \
: D0, D1) nowait
#pragma omp interop destroy(interop) depend(in \
: D0, D1)
}
// CHECK-LABEL: @_Z5test1v(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DEVICE_ID:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[D0:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[D1:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[INTEROP:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8
// CHECK-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8
// CHECK-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8
// CHECK-NEXT: store i32 4, ptr [[DEVICE_ID]], align 4
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-NEXT: call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], ptr [[INTEROP]], i32 1, i32 -1, i64 0, ptr null, i32 0)
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], ptr [[INTEROP]], i32 2, i32 -1, i64 0, ptr null, i32 0)
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[DEVICE_ID]], align 4
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], ptr [[INTEROP]], i32 1, i32 [[TMP0]], i64 0, ptr null, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DEVICE_ID]], align 4
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], ptr [[INTEROP]], i32 2, i32 [[TMP1]], i64 0, ptr null, i32 0)
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0
// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[D0]] to i64
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], ptr [[TMP2]], i64 0
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP4]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP4]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP6]], align 8
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP4]], i32 0, i32 2
// CHECK-NEXT: store i8 1, ptr [[TMP7]], align 8
// CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[D1]] to i64
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP2]], i64 1
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP10]], align 8
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP11]], align 8
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 2
// CHECK-NEXT: store i8 1, ptr [[TMP12]], align 8
// CHECK-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR]], align 8
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_use(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4]], ptr [[INTEROP]], i32 -1, i32 2, ptr [[TMP2]], i32 1)
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR5]], i64 0, i64 0
// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[D0]] to i64
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP13]], i64 0
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP17]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 2
// CHECK-NEXT: store i8 1, ptr [[TMP18]], align 8
// CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[D1]] to i64
// CHECK-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP13]], i64 1
// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP19]], ptr [[TMP21]], align 8
// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP22]], align 8
// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 2
// CHECK-NEXT: store i8 1, ptr [[TMP23]], align 8
// CHECK-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR6]], align 8
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_destroy(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], ptr [[INTEROP]], i32 -1, i32 2, ptr [[TMP13]], i32 0)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: @_ZN1S11member_testEv(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DEVICE_ID:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[D0:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[D1:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8
// CHECK-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[DOTDEP_ARR_ADDR10:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8
// CHECK-NEXT: [[DEP_COUNTER_ADDR11:%.*]] = alloca i64, align 8
// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: store i32 4, ptr [[DEVICE_ID]], align 4
// CHECK-NEXT: [[INTEROP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], ptr [[INTEROP]], i32 1, i32 -1, i64 0, ptr null, i32 0)
// CHECK-NEXT: [[INTEROP2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], ptr [[INTEROP2]], i32 2, i32 -1, i64 0, ptr null, i32 0)
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[DEVICE_ID]], align 4
// CHECK-NEXT: [[INTEROP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM5]], ptr [[INTEROP4]], i32 1, i32 [[TMP0]], i64 0, ptr null, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DEVICE_ID]], align 4
// CHECK-NEXT: [[INTEROP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_init(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], ptr [[INTEROP6]], i32 2, i32 [[TMP1]], i64 0, ptr null, i32 0)
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0
// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[D0]] to i64
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], ptr [[TMP2]], i64 0
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP4]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP4]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP6]], align 8
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP4]], i32 0, i32 2
// CHECK-NEXT: store i8 1, ptr [[TMP7]], align 8
// CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[D1]] to i64
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP2]], i64 1
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP10]], align 8
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP11]], align 8
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 2
// CHECK-NEXT: store i8 1, ptr [[TMP12]], align 8
// CHECK-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR]], align 8
// CHECK-NEXT: [[INTEROP8:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_use(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9]], ptr [[INTEROP8]], i32 -1, i32 2, ptr [[TMP2]], i32 1)
// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR10]], i64 0, i64 0
// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[D0]] to i64
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP13]], i64 0
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP17]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 2
// CHECK-NEXT: store i8 1, ptr [[TMP18]], align 8
// CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[D1]] to i64
// CHECK-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP13]], i64 1
// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 0
// CHECK-NEXT: store i64 [[TMP19]], ptr [[TMP21]], align 8
// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 1
// CHECK-NEXT: store i64 4, ptr [[TMP22]], align 8
// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 2
// CHECK-NEXT: store i8 1, ptr [[TMP23]], align 8
// CHECK-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR11]], align 8
// CHECK-NEXT: [[INTEROP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__tgt_interop_destroy(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], ptr [[INTEROP12]], i32 -1, i32 2, ptr [[TMP13]], i32 0)
// CHECK-NEXT: ret void
//