2017-01-25 21:27:24 +00:00
|
|
|
VERS1.0 {
|
|
|
|
global:
|
2024-02-22 12:01:52 -06:00
|
|
|
__tgt_rtl_init;
|
|
|
|
__tgt_rtl_deinit;
|
[OpenMP][libomptarget] Enable requires flags for target libraries.
Summary:
Target link variables are currently implemented by creating a copy of the variables on the device side and unified memory never gets exploited.
When the prgram uses the:
```
#pragma omp requires unified_shared_memory
```
directive in conjunction with a declare target link, the linked variable is no longer allocated on the device and the host version is used instead.
This behavior is overridden by performing an explicit mapping.
A Clang side patch is required.
Reviewers: ABataev, AlexEichenberger, grokos, Hahnfeld
Reviewed By: AlexEichenberger, grokos, Hahnfeld
Subscribers: Hahnfeld, jfb, guansong, jdoerfert, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D60223
llvm-svn: 361294
2019-05-21 19:35:02 +00:00
|
|
|
__tgt_register_requires;
|
2017-01-25 21:27:24 +00:00
|
|
|
__tgt_register_lib;
|
|
|
|
__tgt_unregister_lib;
|
2021-07-27 22:38:27 -04:00
|
|
|
__tgt_init_all_rtls;
|
2017-01-25 21:27:24 +00:00
|
|
|
__tgt_target_data_begin;
|
|
|
|
__tgt_target_data_end;
|
|
|
|
__tgt_target_data_update;
|
|
|
|
__tgt_target;
|
|
|
|
__tgt_target_teams;
|
|
|
|
__tgt_target_data_begin_nowait;
|
|
|
|
__tgt_target_data_end_nowait;
|
|
|
|
__tgt_target_data_update_nowait;
|
|
|
|
__tgt_target_nowait;
|
|
|
|
__tgt_target_teams_nowait;
|
2020-07-15 13:24:03 -07:00
|
|
|
__tgt_target_data_begin_mapper;
|
|
|
|
__tgt_target_data_end_mapper;
|
|
|
|
__tgt_target_data_update_mapper;
|
|
|
|
__tgt_target_mapper;
|
|
|
|
__tgt_target_teams_mapper;
|
|
|
|
__tgt_target_data_begin_nowait_mapper;
|
|
|
|
__tgt_target_data_end_nowait_mapper;
|
|
|
|
__tgt_target_data_update_nowait_mapper;
|
|
|
|
__tgt_target_nowait_mapper;
|
|
|
|
__tgt_target_teams_nowait_mapper;
|
2022-06-23 14:57:59 -04:00
|
|
|
__tgt_target_kernel;
|
|
|
|
__tgt_target_kernel_nowait;
|
2022-12-14 13:46:23 -03:00
|
|
|
__tgt_target_nowait_query;
|
2023-01-17 15:35:44 -08:00
|
|
|
__tgt_target_kernel_replay;
|
2023-07-19 10:32:07 -07:00
|
|
|
__tgt_activate_record_replay;
|
2019-08-04 04:18:28 +00:00
|
|
|
__tgt_mapper_num_components;
|
|
|
|
__tgt_push_mapper_component;
|
2021-03-10 13:25:33 -05:00
|
|
|
__kmpc_push_target_tripcount;
|
|
|
|
__kmpc_push_target_tripcount_mapper;
|
2024-03-18 20:09:20 +01:00
|
|
|
ompx_dump_mapping_tables;
|
2023-01-11 22:05:33 -05:00
|
|
|
omp_get_mapped_ptr;
|
2017-01-25 21:27:24 +00:00
|
|
|
omp_get_num_devices;
|
2022-06-22 10:05:34 -05:00
|
|
|
omp_get_device_num;
|
2017-01-25 21:27:24 +00:00
|
|
|
omp_get_initial_device;
|
|
|
|
omp_target_alloc;
|
|
|
|
omp_target_free;
|
|
|
|
omp_target_is_present;
|
|
|
|
omp_target_memcpy;
|
|
|
|
omp_target_memcpy_rect;
|
2023-03-28 10:32:11 -04:00
|
|
|
omp_target_memcpy_async;
|
|
|
|
omp_target_memcpy_rect_async;
|
2023-10-19 15:29:36 +02:00
|
|
|
omp_target_memset;
|
|
|
|
omp_target_memset_async;
|
2017-01-25 21:27:24 +00:00
|
|
|
omp_target_associate_ptr;
|
|
|
|
omp_target_disassociate_ptr;
|
2021-03-03 11:48:32 -08:00
|
|
|
llvm_omp_target_alloc_host;
|
|
|
|
llvm_omp_target_alloc_shared;
|
|
|
|
llvm_omp_target_alloc_device;
|
2022-08-31 15:55:14 -05:00
|
|
|
llvm_omp_target_free_host;
|
|
|
|
llvm_omp_target_free_shared;
|
|
|
|
llvm_omp_target_free_device;
|
2022-04-06 15:56:19 -04:00
|
|
|
llvm_omp_target_dynamic_shared_alloc;
|
2023-01-13 12:18:49 -06:00
|
|
|
llvm_omp_target_lock_mem;
|
|
|
|
llvm_omp_target_unlock_mem;
|
2021-04-21 17:31:09 -04:00
|
|
|
__tgt_set_info_flag;
|
2021-07-27 21:47:40 -04:00
|
|
|
__tgt_print_device_info;
|
2022-01-27 15:15:25 -05:00
|
|
|
omp_get_interop_ptr;
|
|
|
|
omp_get_interop_str;
|
|
|
|
omp_get_interop_int;
|
|
|
|
omp_get_interop_name;
|
|
|
|
omp_get_interop_type_desc;
|
|
|
|
__tgt_interop_init;
|
|
|
|
__tgt_interop_use;
|
|
|
|
__tgt_interop_destroy;
|
[Offload][CUDA] Allow CUDA kernels to use LLVM/Offload (#94549)
Through the new `-foffload-via-llvm` flag, CUDA kernels can now be
lowered to the LLVM/Offload API. On the Clang side, this is simply done
by using the OpenMP offload toolchain and emitting calls to `llvm*`
functions to orchestrate the kernel launch rather than `cuda*`
functions. These `llvm*` functions are implemented on top of the
existing LLVM/Offload API.
As we are about to redefine the Offload API, this wil help us in the
design process as a second offload language.
We do not support any CUDA APIs yet, however, we could:
https://www.osti.gov/servlets/purl/1892137
For proper host execution we need to resurrect/rebase
https://tianshilei.me/wp-content/uploads/2021/12/llpp-2021.pdf
(which was designed for debugging).
```
❯❯❯ cat test.cu
extern "C" {
void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
void llvm_omp_target_free_shared(void *DevicePtr, int DeviceNum);
}
__global__ void square(int *A) { *A = 42; }
int main(int argc, char **argv) {
int DevNo = 0;
int *Ptr = reinterpret_cast<int *>(llvm_omp_target_alloc_shared(4, DevNo));
*Ptr = 7;
printf("Ptr %p, *Ptr %i\n", Ptr, *Ptr);
square<<<1, 1>>>(Ptr);
printf("Ptr %p, *Ptr %i\n", Ptr, *Ptr);
llvm_omp_target_free_shared(Ptr, DevNo);
}
❯❯❯ clang++ test.cu -O3 -o test123 -foffload-via-llvm --offload-arch=native
❯❯❯ llvm-objdump --offloading test123
test123: file format elf64-x86-64
OFFLOADING IMAGE [0]:
kind elf
arch gfx90a
triple amdgcn-amd-amdhsa
producer openmp
❯❯❯ LIBOMPTARGET_INFO=16 ./test123
Ptr 0x155448ac8000, *Ptr 7
Ptr 0x155448ac8000, *Ptr 42
```
2024-08-12 17:44:58 -07:00
|
|
|
__llvmPushCallConfiguration;
|
|
|
|
__llvmPopCallConfiguration;
|
|
|
|
llvmLaunchKernel;
|
2017-01-25 21:27:24 +00:00
|
|
|
local:
|
|
|
|
*;
|
|
|
|
};
|