diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports index 0667d53c35a1..3983dae80c9f 100644 --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -532,6 +532,18 @@ kmp_set_disp_num_buffers 890 omp_get_device_num 896 omp_init_allocator 897 omp_destroy_allocator 898 + omp_get_devices_memspace 810 + omp_get_device_memspace 811 + omp_get_devices_and_host_memspace 812 + omp_get_device_and_host_memspace 813 + omp_get_devices_all_memspace 814 + omp_get_devices_allocator 815 + omp_get_device_allocator 816 + omp_get_devices_and_host_allocator 817 + omp_get_device_and_host_allocator 818 + omp_get_devices_all_allocator 819 + omp_get_memspace_num_resources 820 + omp_get_submemspace 821 %ifndef stub __kmpc_set_default_allocator __kmpc_get_default_allocator @@ -592,6 +604,7 @@ kmp_set_disp_num_buffers 890 llvm_omp_target_host_mem_space DATA llvm_omp_target_shared_mem_space DATA llvm_omp_target_device_mem_space DATA + omp_null_mem_space DATA %ifndef stub # Ordinals between 900 and 999 are reserved diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var index 82f9d07657ff..74f385feb3ea 100644 --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -339,7 +339,13 @@ omp_atk_fallback = 5, omp_atk_fb_data = 6, omp_atk_pinned = 7, - omp_atk_partition = 8 + omp_atk_partition = 8, + omp_atk_pin_device = 9, + omp_atk_preferred_device = 10, + omp_atk_device_access = 11, + omp_atk_target_access = 12, + omp_atk_atomic_scope = 13, + omp_atk_part_size = 14 } omp_alloctrait_key_t; typedef enum { @@ -350,7 +356,7 @@ omp_atv_serialized = 5, omp_atv_sequential = omp_atv_serialized, // (deprecated) omp_atv_private = 6, - omp_atv_all = 7, + omp_atv_device = 7, omp_atv_thread = 8, omp_atv_pteam = 9, omp_atv_cgroup = 10, @@ -361,7 +367,11 @@ omp_atv_environment = 15, omp_atv_nearest = 16, omp_atv_blocked = 17, - omp_atv_interleaved = 18 + omp_atv_interleaved = 18, + omp_atv_all = 19, + omp_atv_single = 20, + omp_atv_multiple = 21, + omp_atv_memspace = 22 } omp_alloctrait_value_t; #define omp_atv_default ((omp_uintptr_t)-1) @@ -387,6 +397,7 @@ extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc; typedef omp_uintptr_t omp_memspace_handle_t; + extern __KMP_IMP omp_memspace_handle_t const omp_null_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space; @@ -422,7 +433,8 @@ typedef enum omp_memspace_handle_t # endif { - omp_default_mem_space = 0, + omp_null_mem_space = 0, + omp_default_mem_space = 99, omp_large_cap_mem_space = 1, omp_const_mem_space = 2, omp_high_bw_mem_space = 3, @@ -463,6 +475,20 @@ extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, omp_allocator_handle_t a); # endif + /* OpenMP TR11 routines to get memory spaces and allocators */ + extern omp_memspace_handle_t omp_get_devices_memspace(int ndevs, const int *devs, omp_memspace_handle_t memspace); + extern omp_memspace_handle_t omp_get_device_memspace(int dev, omp_memspace_handle_t memspace); + extern omp_memspace_handle_t omp_get_devices_and_host_memspace(int ndevs, const int *devs, omp_memspace_handle_t memspace); + extern omp_memspace_handle_t omp_get_device_and_host_memspace(int dev, omp_memspace_handle_t memspace); + extern omp_memspace_handle_t omp_get_devices_all_memspace(omp_memspace_handle_t memspace); + extern omp_allocator_handle_t omp_get_devices_allocator(int ndevs, const int *devs, omp_memspace_handle_t memspace); + extern omp_allocator_handle_t omp_get_device_allocator(int dev, omp_memspace_handle_t memspace); + extern omp_allocator_handle_t omp_get_devices_and_host_allocator(int ndevs, const int *devs, omp_memspace_handle_t memspace); + extern omp_allocator_handle_t omp_get_device_and_host_allocator(int dev, omp_memspace_handle_t memspace); + extern omp_allocator_handle_t omp_get_devices_all_allocator(omp_memspace_handle_t memspace); + extern int omp_get_memspace_num_resources(omp_memspace_handle_t memspace); + extern omp_memspace_handle_t omp_get_submemspace(omp_memspace_handle_t memspace, int num_resources, int *resources); + /* OpenMP 5.0 Affinity Format */ extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *); extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t); diff --git a/openmp/runtime/src/include/omp_lib.F90.var b/openmp/runtime/src/include/omp_lib.F90.var index 5133915c7d8c..3463b698291e 100644 --- a/openmp/runtime/src/include/omp_lib.F90.var +++ b/openmp/runtime/src/include/omp_lib.F90.var @@ -145,6 +145,12 @@ integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_fb_data = 6 integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_pinned = 7 integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_partition = 8 + integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_pin_device = 9 + integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_preferred_device = 10 + integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_device_access = 11 + integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_target_access = 12 + integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_atomic_scope = 13 + integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_part_size = 14 integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_default = -1 integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_false = 0 @@ -154,7 +160,7 @@ integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_serialized = 5 integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_sequential = omp_atv_serialized integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_private = 6 - integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_all = 7 + integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_device = 7 integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_thread = 8 integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_pteam = 9 integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_cgroup = 10 @@ -166,6 +172,10 @@ integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_nearest = 16 integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_blocked = 17 integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_interleaved = 18 + integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_all = 19 + integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_single = 20 + integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_multiple = 21 + integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_memspace = 22 integer (kind=omp_allocator_handle_kind), parameter, public :: omp_null_allocator = 0 integer (kind=omp_allocator_handle_kind), parameter, public :: omp_default_mem_alloc = 1 @@ -180,7 +190,8 @@ integer (kind=omp_allocator_handle_kind), parameter, public :: llvm_omp_target_shared_mem_alloc = 101 integer (kind=omp_allocator_handle_kind), parameter, public :: llvm_omp_target_device_mem_alloc = 102 - integer (kind=omp_memspace_handle_kind), parameter, public :: omp_default_mem_space = 0 + integer (kind=omp_memspace_handle_kind), parameter, public :: omp_null_mem_space = 0 + integer (kind=omp_memspace_handle_kind), parameter, public :: omp_default_mem_space = 99 integer (kind=omp_memspace_handle_kind), parameter, public :: omp_large_cap_mem_space = 1 integer (kind=omp_memspace_handle_kind), parameter, public :: omp_const_mem_space = 2 integer (kind=omp_memspace_handle_kind), parameter, public :: omp_high_bw_mem_space = 3 @@ -802,6 +813,97 @@ logical (kind=omp_logical_kind) omp_in_explicit_task end function omp_in_explicit_task + function omp_get_devices_memspace(ndevs, devs, memspace) + use omp_lib_kinds + integer(omp_memspace_handle_kind) :: omp_get_devices_memspace + integer, intent(in) :: ndevs + integer, intent(in) :: devs(*) + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_memspace + + function omp_get_device_memspace(dev, memspace) + use omp_lib_kinds + integer(omp_memspace_handle_kind) :: omp_get_device_memspace + integer, intent(in) :: dev + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_device_memspace + + function omp_get_devices_and_host_memspace(ndevs, devs, memspace) + use omp_lib_kinds + integer(omp_memspace_handle_kind) :: & + omp_get_devices_and_host_memspace + integer, intent(in) :: ndevs + integer, intent(in) :: devs(*) + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_and_host_memspace + + function omp_get_device_and_host_memspace(dev, memspace) + use omp_lib_kinds + integer(omp_memspace_handle_kind) :: & + omp_get_device_and_host_memspace + integer, intent(in) :: dev + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_device_and_host_memspace + + function omp_get_devices_all_memspace(memspace) + use omp_lib_kinds + integer(omp_memspace_handle_kind) :: omp_get_devices_all_memspace + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_all_memspace + + function omp_get_devices_allocator(ndevs, devs, memspace) + use omp_lib_kinds + integer(omp_allocator_handle_kind) :: omp_get_devices_allocator + integer, intent(in) :: ndevs + integer, intent(in) :: devs(*) + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_allocator + + function omp_get_device_allocator(dev, memspace) + use omp_lib_kinds + integer(omp_allocator_handle_kind) :: omp_get_device_allocator + integer, intent(in) :: dev + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_device_allocator + + function omp_get_devices_and_host_allocator(ndevs, devs, memspace) + use omp_lib_kinds + integer(omp_allocator_handle_kind) :: & + omp_get_devices_and_host_allocator + integer, intent(in) :: ndevs + integer, intent(in) :: devs(*) + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_and_host_allocator + + function omp_get_device_and_host_allocator(dev, memspace) + use omp_lib_kinds + integer(omp_allocator_handle_kind) :: & + omp_get_device_and_host_allocator + integer, intent(in) :: dev + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_device_and_host_allocator + + function omp_get_devices_all_allocator(memspace) + use omp_lib_kinds + integer(omp_allocator_handle_kind) :: & + omp_get_devices_all_allocator + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_all_allocator + + function omp_get_memspace_num_resources(memspace) + use omp_lib_kinds + integer omp_get_memspace_num_resources + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_memspace_num_resources + + function omp_get_submemspace(memspace, num_resources, resources) + use omp_lib_kinds + integer(omp_memspace_handle_kind) omp_get_submemspace + integer(omp_memspace_handle_kind), intent(in) :: memspace + integer, intent(in) :: num_resources + integer, intent(in) :: resources(*) + end function omp_get_submemspace + ! *** ! *** kmp_* entry points ! *** diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var index db1dc889d129..5793a3ac2e68 100644 --- a/openmp/runtime/src/include/omp_lib.h.var +++ b/openmp/runtime/src/include/omp_lib.h.var @@ -151,6 +151,18 @@ parameter(omp_atk_pinned=7) integer(kind=omp_alloctrait_key_kind)omp_atk_partition parameter(omp_atk_partition=8) + integer(kind=omp_alloctrait_key_kind)omp_atk_pin_device + parameter(omp_atk_pin_device=9) + integer(kind=omp_alloctrait_key_kind)omp_atk_preferred_device + parameter(omp_atk_preferred_device=10) + integer(kind=omp_alloctrait_key_kind)omp_atk_device_access + parameter(omp_atk_device_access=11) + integer(kind=omp_alloctrait_key_kind)omp_atk_target_access + parameter(omp_atk_target_access=12) + integer(kind=omp_alloctrait_key_kind)omp_atk_atomic_scope + parameter(omp_atk_atomic_scope=13) + integer(kind=omp_alloctrait_key_kind)omp_atk_part_size + parameter(omp_atk_part_size=14) integer(kind=omp_alloctrait_val_kind)omp_atv_default parameter(omp_atv_default=-1) @@ -170,8 +182,8 @@ parameter(omp_atv_sequential=5) integer(kind=omp_alloctrait_val_kind)omp_atv_private parameter(omp_atv_private=6) - integer(kind=omp_alloctrait_val_kind)omp_atv_all - parameter(omp_atv_all=7) + integer(kind=omp_alloctrait_val_kind)omp_atv_device + parameter(omp_atv_device=7) integer(kind=omp_alloctrait_val_kind)omp_atv_thread parameter(omp_atv_thread=8) integer(kind=omp_alloctrait_val_kind)omp_atv_pteam @@ -194,6 +206,14 @@ parameter(omp_atv_blocked=17) integer(kind=omp_alloctrait_val_kind)omp_atv_interleaved parameter(omp_atv_interleaved=18) + integer(kind=omp_alloctrait_val_kind)omp_atv_all + parameter(omp_atv_all=19) + integer(kind=omp_alloctrait_val_kind)omp_atv_single + parameter(omp_atv_single=20) + integer(kind=omp_alloctrait_val_kind)omp_atv_multiple + parameter(omp_atv_multiple=21) + integer(kind=omp_alloctrait_val_kind)omp_atv_memspace + parameter(omp_atv_memspace=22) type omp_alloctrait integer (kind=omp_alloctrait_key_kind) key @@ -225,8 +245,10 @@ integer(omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc parameter(llvm_omp_target_device_mem_alloc=102) + integer(kind=omp_memspace_handle_kind)omp_null_mem_space + parameter(omp_null_mem_space=0) integer(kind=omp_memspace_handle_kind)omp_default_mem_space - parameter(omp_default_mem_space=0) + parameter(omp_default_mem_space=99) integer(kind=omp_memspace_handle_kind)omp_large_cap_mem_space parameter(omp_large_cap_mem_space=1) integer(kind=omp_memspace_handle_kind)omp_const_mem_space @@ -863,6 +885,98 @@ logical (kind=omp_logical_kind) omp_in_explicit_task end function omp_in_explicit_task + function omp_get_devices_memspace(ndevs, devs, memspace) + import + integer(omp_memspace_handle_kind) :: omp_get_devices_memspace + integer, intent(in) :: ndevs + integer, intent(in) :: devs(*) + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_memspace + + function omp_get_device_memspace(dev, memspace) + import + integer(omp_memspace_handle_kind) :: omp_get_device_memspace + integer, intent(in) :: dev + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_device_memspace + + function omp_get_devices_and_host_memspace(ndevs,devs,memspace) + import + integer(omp_memspace_handle_kind) :: & + & omp_get_devices_and_host_memspace + integer, intent(in) :: ndevs + integer, intent(in) :: devs(*) + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_and_host_memspace + + function omp_get_device_and_host_memspace(dev, memspace) + import + integer(omp_memspace_handle_kind) :: & + & omp_get_device_and_host_memspace + integer, intent(in) :: dev + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_device_and_host_memspace + + function omp_get_devices_all_memspace(memspace) + import + integer(omp_memspace_handle_kind)::omp_get_devices_all_memspace + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_all_memspace + + function omp_get_devices_allocator(ndevs, devs, memspace) + import + integer(omp_allocator_handle_kind)::omp_get_devices_allocator + integer, intent(in) :: ndevs + integer, intent(in) :: devs(*) + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_allocator + + function omp_get_device_allocator(dev, memspace) + import + integer(omp_allocator_handle_kind) :: omp_get_device_allocator + integer, intent(in) :: dev + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_device_allocator + + function omp_get_devices_and_host_allocator(ndevs,devs,memspace) + import + integer(omp_allocator_handle_kind) :: & + & omp_get_devices_and_host_allocator + integer, intent(in) :: ndevs + integer, intent(in) :: devs(*) + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_and_host_allocator + + function omp_get_device_and_host_allocator(dev, memspace) + import + integer(omp_allocator_handle_kind) :: & + & omp_get_device_and_host_allocator + integer, intent(in) :: dev + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_device_and_host_allocator + + function omp_get_devices_all_allocator(memspace) + import + integer(omp_allocator_handle_kind) :: & + & omp_get_devices_all_allocator + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_devices_all_allocator + + function omp_get_memspace_num_resources(memspace) + import + integer omp_get_memspace_num_resources + integer(omp_memspace_handle_kind), intent(in) :: memspace + end function omp_get_memspace_num_resources + + function omp_get_submemspace(memspace, num_resources, resources) + import + integer(omp_memspace_handle_kind) omp_get_submemspace + integer(omp_memspace_handle_kind), intent(in) :: memspace + integer, intent(in) :: num_resources + integer, intent(in) :: resources(*) + end function omp_get_submemspace + + ! *** ! *** kmp_* entry points ! *** diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 28a5522f3a58..d5d667c32c64 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1046,7 +1046,13 @@ typedef enum { omp_atk_fallback = 5, omp_atk_fb_data = 6, omp_atk_pinned = 7, - omp_atk_partition = 8 + omp_atk_partition = 8, + omp_atk_pin_device = 9, + omp_atk_preferred_device = 10, + omp_atk_device_access = 11, + omp_atk_target_access = 12, + omp_atk_atomic_scope = 13, + omp_atk_part_size = 14 } omp_alloctrait_key_t; typedef enum { @@ -1057,7 +1063,7 @@ typedef enum { omp_atv_serialized = 5, omp_atv_sequential = omp_atv_serialized, // (deprecated) omp_atv_private = 6, - omp_atv_all = 7, + omp_atv_device = 7, omp_atv_thread = 8, omp_atv_pteam = 9, omp_atv_cgroup = 10, @@ -1068,11 +1074,16 @@ typedef enum { omp_atv_environment = 15, omp_atv_nearest = 16, omp_atv_blocked = 17, - omp_atv_interleaved = 18 + omp_atv_interleaved = 18, + omp_atv_all = 19, + omp_atv_single = 20, + omp_atv_multiple = 21, + omp_atv_memspace = 22 } omp_alloctrait_value_t; #define omp_atv_default ((omp_uintptr_t)-1) typedef void *omp_memspace_handle_t; +extern omp_memspace_handle_t const omp_null_mem_space; extern omp_memspace_handle_t const omp_default_mem_space; extern omp_memspace_handle_t const omp_large_cap_mem_space; extern omp_memspace_handle_t const omp_const_mem_space; @@ -1081,6 +1092,7 @@ extern omp_memspace_handle_t const omp_low_lat_mem_space; extern omp_memspace_handle_t const llvm_omp_target_host_mem_space; extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space; extern omp_memspace_handle_t const llvm_omp_target_device_mem_space; +extern omp_memspace_handle_t const kmp_max_mem_space; typedef struct { omp_alloctrait_key_t key; @@ -1109,8 +1121,15 @@ extern omp_allocator_handle_t __kmp_def_allocator; extern int __kmp_memkind_available; extern bool __kmp_hwloc_available; -typedef omp_memspace_handle_t kmp_memspace_t; // placeholder +/// Memory space informaition is shared with offload runtime. +typedef struct kmp_memspace_t { + omp_memspace_handle_t memspace; // predefined input memory space + int num_resources = 0; // number of available resources + int *resources = nullptr; // available resources + kmp_memspace_t *next = nullptr; // next memory space handle +} kmp_memspace_t; +/// Memory allocator information is shared with offload runtime. typedef struct kmp_allocator_t { omp_memspace_handle_t memspace; void **memkind; // pointer to memkind @@ -1120,6 +1139,12 @@ typedef struct kmp_allocator_t { kmp_uint64 pool_size; kmp_uint64 pool_used; bool pinned; + omp_alloctrait_value_t partition; + int pin_device; + int preferred_device; + omp_alloctrait_value_t target_access; + omp_alloctrait_value_t atomic_scope; + size_t part_size; #if KMP_USE_HWLOC omp_alloctrait_value_t membind; #endif @@ -1155,6 +1180,21 @@ extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); extern void __kmp_init_memkind(); extern void __kmp_fini_memkind(); extern void __kmp_init_target_mem(); +extern void __kmp_fini_target_mem(); + +// OpenMP 6.0 (TR11) Memory Management support +extern omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, + const int *devs, + omp_memspace_handle_t, + int host); +extern omp_allocator_handle_t __kmp_get_devices_allocator(int ndevs, + const int *devs, + omp_memspace_handle_t, + int host); +extern int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace); +extern omp_memspace_handle_t +__kmp_get_submemspace(omp_memspace_handle_t memspace, int num_resources, + int *resources); /* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp index 783d9ffe88aa..801cd06c9550 100644 --- a/openmp/runtime/src/kmp_alloc.cpp +++ b/openmp/runtime/src/kmp_alloc.cpp @@ -1265,15 +1265,190 @@ static void *(*kmp_target_free_host)(void *ptr, int device); static void *(*kmp_target_free_shared)(void *ptr, int device); static void *(*kmp_target_free_device)(void *ptr, int device); static bool __kmp_target_mem_available; + #define KMP_IS_TARGET_MEM_SPACE(MS) \ (MS == llvm_omp_target_host_mem_space || \ MS == llvm_omp_target_shared_mem_space || \ MS == llvm_omp_target_device_mem_space) + #define KMP_IS_TARGET_MEM_ALLOC(MA) \ (MA == llvm_omp_target_host_mem_alloc || \ MA == llvm_omp_target_shared_mem_alloc || \ MA == llvm_omp_target_device_mem_alloc) +#define KMP_IS_PREDEF_MEM_SPACE(MS) \ + (MS == omp_null_mem_space || MS == omp_default_mem_space || \ + MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \ + MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \ + KMP_IS_TARGET_MEM_SPACE(MS)) + +/// Support OMP 6.0 target memory management +/// Expected offload runtime entries. +/// +/// Returns number of resources and list of unique resource IDs in "resouces". +/// Runtime needs to invoke this twice to get the number of resources, allocate +/// space for the resource IDs, and finally let offload runtime write resource +/// IDs in "resources". +/// int __tgt_get_mem_resources(int num_devices, const int *devices, +/// int host_access, omp_memspace_handle_t memspace, +/// int *resources); +/// +/// Redirects omp_alloc call to offload runtime. +/// void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator); +/// +/// Redirects omp_free call to offload runtime. +/// void __tgt_omp_free(void *ptr, omp_allocator_handle_t); +class kmp_tgt_allocator_t { + bool supported = false; + using get_mem_resources_t = int (*)(int, const int *, int, + omp_memspace_handle_t, int *); + using omp_alloc_t = void *(*)(size_t, omp_allocator_handle_t); + using omp_free_t = void (*)(void *, omp_allocator_handle_t); + get_mem_resources_t tgt_get_mem_resources = nullptr; + omp_alloc_t tgt_omp_alloc = nullptr; + omp_free_t tgt_omp_free = nullptr; + +public: + /// Initialize interface with offload runtime + void init() { + tgt_get_mem_resources = + (get_mem_resources_t)KMP_DLSYM("__tgt_get_mem_resources"); + tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM("__tgt_omp_alloc"); + tgt_omp_free = (omp_free_t)KMP_DLSYM("__tgt_omp_free"); + supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free; + } + /// Obtain resource information from offload runtime. We assume offload + /// runtime backends maintain a list of unique resource IDS. + int get_mem_resources(int ndevs, const int *devs, int host, + omp_memspace_handle_t memspace, int *resources) { + if (supported) + return tgt_get_mem_resources(ndevs, devs, host, memspace, resources); + return 0; + } + /// Invoke offload runtime's memory allocation routine + void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { + if (supported) + return tgt_omp_alloc(size, allocator); + return nullptr; + } + /// Invoke offload runtime's memory deallocation routine + void omp_free(void *ptr, omp_allocator_handle_t allocator) { + if (supported) + tgt_omp_free(ptr, allocator); + } +} __kmp_tgt_allocator; + +extern "C" int omp_get_num_devices(void); + +/// Maintain a list of target memory spaces that are identified with the +/// requested information. There will be only one unique memory space object +/// that matches the input. +class kmp_tgt_memspace_list_t { + kmp_memspace_t *memspace_list = nullptr; + KMP_LOCK_INIT(mtx); + /// Find memory space that matches the provided input + kmp_memspace_t *find(int num_resources, const int *resources, + omp_memspace_handle_t memspace) { + kmp_memspace_t *ms = memspace_list; + while (ms) { + if (ms->num_resources == num_resources && ms->memspace == memspace && + !memcmp(ms->resources, resources, sizeof(int) * num_resources)) + break; + ms = ms->next; + } + return ms; + } + /// Return memory space for the provided input. It tries to find existing + /// memory space that exactly matches the provided input or create one if + /// not found. + omp_memspace_handle_t get(int num_resources, const int *resources, + omp_memspace_handle_t memspace) { + int gtid = __kmp_entry_gtid(); + __kmp_acquire_lock(&mtx, gtid); + // Sort absolute IDs in the resource list + int *sorted_resources = (int *)__kmp_allocate(sizeof(int) * num_resources); + KMP_MEMCPY(sorted_resources, resources, num_resources * sizeof(int)); + qsort(sorted_resources, (size_t)num_resources, sizeof(int), + [](const void *a, const void *b) { + const int val_a = *(const int *)a; + const int val_b = *(const int *)b; + return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0); + }); + kmp_memspace_t *ms = find(num_resources, sorted_resources, memspace); + if (ms) { + __kmp_free(sorted_resources); + __kmp_release_lock(&mtx, gtid); + return ms; + } + ms = (kmp_memspace_t *)__kmp_allocate(sizeof(kmp_memspace_t)); + ms->memspace = memspace; + ms->num_resources = num_resources; + ms->resources = sorted_resources; + ms->next = memspace_list; + memspace_list = ms; + __kmp_release_lock(&mtx, gtid); + return ms; + } + +public: + /// Initialize memory space list + void init() { __kmp_init_lock(&mtx); } + /// Release resources for the memory space list + void fini() { + kmp_memspace_t *ms = memspace_list; + while (ms) { + if (ms->resources) + __kmp_free(ms->resources); + kmp_memspace_t *tmp = ms; + ms = ms->next; + __kmp_free(tmp); + } + __kmp_destroy_lock(&mtx); + } + /// Return memory space for the provided input + omp_memspace_handle_t get_memspace(int num_devices, const int *devices, + int host_access, + omp_memspace_handle_t memspace) { + int actual_num_devices = num_devices; + int *actual_devices = const_cast(devices); + if (actual_num_devices == 0) { + actual_num_devices = omp_get_num_devices(); + if (actual_num_devices <= 0) + return omp_null_mem_space; + } + if (actual_devices == NULL) { + // Prepare list of all devices in this case. + actual_devices = (int *)__kmp_allocate(sizeof(int) * actual_num_devices); + for (int i = 0; i < actual_num_devices; i++) + actual_devices[i] = i; + } + // Get the number of available resources first + int num_resources = __kmp_tgt_allocator.get_mem_resources( + actual_num_devices, actual_devices, host_access, memspace, NULL); + if (num_resources <= 0) + return omp_null_mem_space; // No available resources + + omp_memspace_handle_t ms = omp_null_mem_space; + if (num_resources > 0) { + int *resources = (int *)__kmp_allocate(sizeof(int) * num_resources); + // Let offload runtime write the resource IDs + num_resources = __kmp_tgt_allocator.get_mem_resources( + actual_num_devices, actual_devices, host_access, memspace, resources); + ms = get(num_resources, resources, memspace); + __kmp_free(resources); + } + if (!devices && actual_devices) + __kmp_free(actual_devices); + return ms; + } + /// Return sub memory space from the parent memory space + omp_memspace_handle_t get_memspace(int num_resources, const int *resources, + omp_memspace_handle_t parent) { + kmp_memspace_t *ms = (kmp_memspace_t *)parent; + return get(num_resources, resources, ms->memspace); + } +} __kmp_tgt_memspace_list; + #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN static inline void chk_kind(void ***pkind) { KMP_DEBUG_ASSERT(pkind); @@ -1456,19 +1631,30 @@ void __kmp_init_target_mem() { // lock/pin and unlock/unpin target calls *(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem"); *(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem"); + __kmp_tgt_allocator.init(); + __kmp_tgt_memspace_list.init(); } +/// Finalize target memory support +void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.fini(); } + omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, int ntraits, omp_alloctrait_t traits[]) { - // OpenMP 5.0 only allows predefined memspaces - KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space || - ms == omp_large_cap_mem_space || ms == omp_const_mem_space || - ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms)); kmp_allocator_t *al; int i; al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed al->memspace = ms; // not used currently + + // Assign default values if applicable + al->alignment = 1; + al->pinned = false; + al->partition = omp_atv_environment; + al->pin_device = -1; + al->preferred_device = -1; + al->target_access = omp_atv_single; + al->atomic_scope = omp_atv_device; + for (i = 0; i < ntraits; ++i) { switch (traits[i].key) { case omp_atk_sync_hint: @@ -1503,10 +1689,33 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, #endif al->memkind = RCAST(void **, traits[i].value); break; + case omp_atk_pin_device: + __kmp_type_convert(traits[i].value, &(al->pin_device)); + break; + case omp_atk_preferred_device: + __kmp_type_convert(traits[i].value, &(al->preferred_device)); + break; + case omp_atk_target_access: + al->target_access = (omp_alloctrait_value_t)traits[i].value; + break; + case omp_atk_atomic_scope: + al->atomic_scope = (omp_alloctrait_value_t)traits[i].value; + break; + case omp_atk_part_size: + __kmp_type_convert(traits[i].value, &(al->part_size)); + break; default: KMP_ASSERT2(0, "Unexpected allocator trait"); } } + + if (al->memspace > kmp_max_mem_space) { + // Memory space has been allocated for targets. + return (omp_allocator_handle_t)al; + } + + KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace)); + if (al->fb == 0) { // set default allocator al->fb = omp_atv_default_mem_fb; @@ -1580,6 +1789,71 @@ omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) { return __kmp_threads[gtid]->th.th_def_allocator; } +omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, const int *devs, + omp_memspace_handle_t memspace, + int host) { + if (!__kmp_init_serial) + __kmp_serial_initialize(); + // Only accept valid device description and predefined memory space + if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space) + return omp_null_mem_space; + + return __kmp_tgt_memspace_list.get_memspace(ndevs, devs, host, memspace); +} + +omp_allocator_handle_t +__kmp_get_devices_allocator(int ndevs, const int *devs, + omp_memspace_handle_t memspace, int host) { + if (!__kmp_init_serial) + __kmp_serial_initialize(); + // Only accept valid device description and predefined memory space + if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space) + return omp_null_allocator; + + omp_memspace_handle_t mspace = + __kmp_get_devices_memspace(ndevs, devs, memspace, host); + if (mspace == omp_null_mem_space) + return omp_null_allocator; + + return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL); +} + +int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) { + if (!__kmp_init_serial) + __kmp_serial_initialize(); + if (memspace == omp_null_mem_space) + return 0; + if (memspace < kmp_max_mem_space) + return 1; // return 1 for predefined memory space + kmp_memspace_t *ms = (kmp_memspace_t *)memspace; + return ms->num_resources; +} + +omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace, + int num_resources, int *resources) { + if (!__kmp_init_serial) + __kmp_serial_initialize(); + if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space) + return memspace; // return input memory space for predefined memory space + kmp_memspace_t *ms = (kmp_memspace_t *)memspace; + if (num_resources == 0 || ms->num_resources < num_resources || !resources) + return omp_null_mem_space; // input memory space cannot satisfy the request + + // The stored resource ID is an absolute ID only known to the offload backend, + // and the returned memory space will still keep the property. + int *resources_abs = (int *)__kmp_allocate(sizeof(int) * num_resources); + + // Collect absolute resource ID from the relative ID + for (int i = 0; i < num_resources; i++) + resources_abs[i] = ms->resources[resources[i]]; + + omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.get_memspace( + num_resources, resources_abs, memspace); + __kmp_free(resources_abs); + + return submemspace; +} + typedef struct kmp_mem_desc { // Memory block descriptor void *ptr_alloc; // Pointer returned by allocator size_t size_a; // Size of allocated memory block (initial+descriptor+align) @@ -1667,6 +1941,11 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, int use_default_allocator = (!__kmp_hwloc_available && !__kmp_memkind_available); + if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) { + // Memspace has been allocated for targets. + return __kmp_tgt_allocator.omp_alloc(size, allocator); + } + if (KMP_IS_TARGET_MEM_ALLOC(allocator)) { // Use size input directly as the memory may not be accessible on host. // Use default device for now. @@ -2021,6 +2300,12 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) { kmp_mem_desc_t desc; kmp_uintptr_t addr_align; // address to return to caller kmp_uintptr_t addr_descr; // address of memory block descriptor + + if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) { + __kmp_tgt_allocator.omp_free(ptr, allocator); + return; + } + if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) || (allocator > kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)))) { diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 9c8be5f953d3..59a9571d5953 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -428,6 +428,129 @@ omp_allocator_handle_t FTN_STDCALL FTN_GET_DEFAULT_ALLOCATOR(void) { #endif } +/* OpenMP 6.0 (TR11) Memory Management support */ +omp_memspace_handle_t FTN_STDCALL +FTN_GET_DEVICES_MEMSPACE(int KMP_DEREF ndevs, const int *devs, + omp_memspace_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + return __kmp_get_devices_memspace(KMP_DEREF ndevs, devs, KMP_DEREF memspace, + 0 /* host */); +#endif +} + +omp_memspace_handle_t FTN_STDCALL FTN_GET_DEVICE_MEMSPACE( + int KMP_DEREF dev, omp_memspace_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + int dev_num = KMP_DEREF dev; + return __kmp_get_devices_memspace(1, &dev_num, KMP_DEREF memspace, 0); +#endif +} + +omp_memspace_handle_t FTN_STDCALL +FTN_GET_DEVICES_AND_HOST_MEMSPACE(int KMP_DEREF ndevs, const int *devs, + omp_memspace_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + return __kmp_get_devices_memspace(KMP_DEREF ndevs, devs, KMP_DEREF memspace, + 1); +#endif +} + +omp_memspace_handle_t FTN_STDCALL FTN_GET_DEVICE_AND_HOST_MEMSPACE( + int KMP_DEREF dev, omp_memspace_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + int dev_num = KMP_DEREF dev; + return __kmp_get_devices_memspace(1, &dev_num, KMP_DEREF memspace, 1); +#endif +} + +omp_memspace_handle_t FTN_STDCALL +FTN_GET_DEVICES_ALL_MEMSPACE(omp_memspace_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + return __kmp_get_devices_memspace(0, NULL, KMP_DEREF memspace, 1); +#endif +} + +omp_allocator_handle_t FTN_STDCALL +FTN_GET_DEVICES_ALLOCATOR(int KMP_DEREF ndevs, const int *devs, + omp_allocator_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + return __kmp_get_devices_allocator(KMP_DEREF ndevs, devs, KMP_DEREF memspace, + 0 /* host */); +#endif +} + +omp_allocator_handle_t FTN_STDCALL FTN_GET_DEVICE_ALLOCATOR( + int KMP_DEREF dev, omp_allocator_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + int dev_num = KMP_DEREF dev; + return __kmp_get_devices_allocator(1, &dev_num, KMP_DEREF memspace, 0); +#endif +} + +omp_allocator_handle_t FTN_STDCALL +FTN_GET_DEVICES_AND_HOST_ALLOCATOR(int KMP_DEREF ndevs, const int *devs, + omp_allocator_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + return __kmp_get_devices_allocator(KMP_DEREF ndevs, devs, KMP_DEREF memspace, + 1); +#endif +} + +omp_allocator_handle_t FTN_STDCALL FTN_GET_DEVICE_AND_HOST_ALLOCATOR( + int KMP_DEREF dev, omp_allocator_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + int dev_num = KMP_DEREF dev; + return __kmp_get_devices_allocator(1, &dev_num, KMP_DEREF memspace, 1); +#endif +} + +omp_allocator_handle_t FTN_STDCALL +FTN_GET_DEVICES_ALL_ALLOCATOR(omp_allocator_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return NULL; +#else + return __kmp_get_devices_allocator(0, NULL, KMP_DEREF memspace, 1); +#endif +} + +int FTN_STDCALL +FTN_GET_MEMSPACE_NUM_RESOURCES(omp_memspace_handle_t KMP_DEREF memspace) { +#ifdef KMP_STUB + return 0; +#else + return __kmp_get_memspace_num_resources(KMP_DEREF memspace); +#endif +} + +omp_memspace_handle_t FTN_STDCALL +FTN_GET_SUBMEMSPACE(omp_memspace_handle_t KMP_DEREF memspace, + int KMP_DEREF num_resources, int *resources) { +#ifdef KMP_STUB + return NULL; +#else + return __kmp_get_submemspace(KMP_DEREF memspace, KMP_DEREF num_resources, + resources); +#endif +} + /* OpenMP 5.0 affinity format support */ #ifndef KMP_STUB static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size, diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h index 7d595b947f4a..ae0ed067235e 100644 --- a/openmp/runtime/src/kmp_ftn_os.h +++ b/openmp/runtime/src/kmp_ftn_os.h @@ -127,6 +127,18 @@ #define FTN_DESTROY_ALLOCATOR omp_destroy_allocator #define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator +#define FTN_GET_DEVICES_MEMSPACE omp_get_devices_memspace +#define FTN_GET_DEVICE_MEMSPACE omp_get_device_memspace +#define FTN_GET_DEVICES_AND_HOST_MEMSPACE omp_get_devices_and_host_memspace +#define FTN_GET_DEVICE_AND_HOST_MEMSPACE omp_get_device_and_host_memspace +#define FTN_GET_DEVICES_ALL_MEMSPACE omp_get_devices_all_memspace +#define FTN_GET_DEVICES_ALLOCATOR omp_get_devices_allocator +#define FTN_GET_DEVICE_ALLOCATOR omp_get_device_allocator +#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR omp_get_devices_and_host_allocator +#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR omp_get_device_and_host_allocator +#define FTN_GET_DEVICES_ALL_ALLOCATOR omp_get_devices_all_allocator +#define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources +#define FTN_GET_SUBMEMSPACE omp_get_submemspace #define FTN_GET_DEVICE_NUM omp_get_device_num #define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format #define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format @@ -262,6 +274,18 @@ #define FTN_DESTROY_ALLOCATOR omp_destroy_allocator_ #define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator_ #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_ +#define FTN_GET_DEVICES_MEMSPACE omp_get_devices_memspace_ +#define FTN_GET_DEVICE_MEMSPACE omp_get_device_memspace_ +#define FTN_GET_DEVICES_AND_HOST_MEMSPACE omp_get_devices_and_host_memspace_ +#define FTN_GET_DEVICE_AND_HOST_MEMSPACE omp_get_device_and_host_memspace_ +#define FTN_GET_DEVICES_ALL_MEMSPACE omp_get_devices_all_memspace_ +#define FTN_GET_DEVICES_ALLOCATOR omp_get_devices_allocator_ +#define FTN_GET_DEVICE_ALLOCATOR omp_get_device_allocator_ +#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR omp_get_devices_and_host_allocator_ +#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR omp_get_device_and_host_allocator_ +#define FTN_GET_DEVICES_ALL_ALLOCATOR omp_get_devices_all_allocator_ +#define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources_ +#define FTN_GET_SUBMEMSPACE omp_get_submemspace_ #define FTN_ALLOC omp_alloc_ #define FTN_FREE omp_free_ #define FTN_GET_DEVICE_NUM omp_get_device_num_ @@ -399,6 +423,18 @@ #define FTN_DESTROY_ALLOCATOR OMP_DESTROY_ALLOCATOR #define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR +#define FTN_GET_DEVICES_MEMSPACE OMP_GET_DEVICES_MEMSPACE +#define FTN_GET_DEVICE_MEMSPACE OMP_GET_DEVICE_MEMSPACE +#define FTN_GET_DEVICES_AND_HOST_MEMSPACE OMP_GET_DEVICES_AND_HOST_MEMSPACE +#define FTN_GET_DEVICE_AND_HOST_MEMSPACE OMP_GET_DEVICE_AND_HOST_MEMSPACE +#define FTN_GET_DEVICES_ALL_MEMSPACE OMP_GET_DEVICES_ALL_MEMSPACE +#define FTN_GET_DEVICES_ALLOCATOR OMP_GET_DEVICES_ALLOCATOR +#define FTN_GET_DEVICE_ALLOCATOR OMP_GET_DEVICE_ALLOCATOR +#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR OMP_GET_DEVICES_AND_HOST_ALLOCATOR +#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR OMP_GET_DEVICE_AND_HOST_ALLOCATOR +#define FTN_GET_DEVICES_ALL_ALLOCATOR OMP_GET_DEVICES_ALL_ALLOCATOR +#define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES +#define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM #define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT #define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT @@ -534,6 +570,18 @@ #define FTN_DESTROY_ALLOCATOR OMP_DESTROY_ALLOCATOR_ #define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR_ #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_ +#define FTN_GET_DEVICES_MEMSPACE OMP_GET_DEVICES_MEMSPACE_ +#define FTN_GET_DEVICE_MEMSPACE OMP_GET_DEVICE_MEMSPACE_ +#define FTN_GET_DEVICES_AND_HOST_MEMSPACE OMP_GET_DEVICES_AND_HOST_MEMSPACE_ +#define FTN_GET_DEVICE_AND_HOST_MEMSPACE OMP_GET_DEVICE_AND_HOST_MEMSPACE_ +#define FTN_GET_DEVICES_ALL_MEMSPACE OMP_GET_DEVICES_ALL_MEMSPACE_ +#define FTN_GET_DEVICES_ALLOCATOR OMP_GET_DEVICES_ALLOCATOR_ +#define FTN_GET_DEVICE_ALLOCATOR OMP_GET_DEVICE_ALLOCATOR_ +#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR OMP_GET_DEVICES_AND_HOST_ALLOCATOR_ +#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR OMP_GET_DEVICE_AND_HOST_ALLOCATOR_ +#define FTN_GET_DEVICES_ALL_ALLOCATOR OMP_GET_DEVICES_ALL_ALLOCATOR_ +#define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES_ +#define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE_ #define FTN_ALLOC OMP_ALLOC_ #define FTN_FREE OMP_FREE_ #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM_ diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index eb077bca4ce2..c3bc235a44fa 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -324,8 +324,9 @@ omp_allocator_handle_t const kmp_max_mem_alloc = (omp_allocator_handle_t const)1024; omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc; +omp_memspace_handle_t const omp_null_mem_space = (omp_memspace_handle_t const)0; omp_memspace_handle_t const omp_default_mem_space = - (omp_memspace_handle_t const)0; + (omp_memspace_handle_t const)99; omp_memspace_handle_t const omp_large_cap_mem_space = (omp_memspace_handle_t const)1; omp_memspace_handle_t const omp_const_mem_space = @@ -340,6 +341,8 @@ omp_memspace_handle_t const llvm_omp_target_shared_mem_space = (omp_memspace_handle_t const)101; omp_memspace_handle_t const llvm_omp_target_device_mem_space = (omp_memspace_handle_t const)102; +omp_memspace_handle_t const kmp_max_mem_space = + (omp_memspace_handle_t const)1024; /* This check ensures that the compiler is passing the correct data type for the flags formal parameter of the function kmpc_omp_task_alloc(). If the type is diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 9f679aa8d334..417eceb8ebec 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -579,7 +579,10 @@ static void __kmp_init_allocator() { __kmp_init_memkind(); __kmp_init_target_mem(); } -static void __kmp_fini_allocator() { __kmp_fini_memkind(); } +static void __kmp_fini_allocator() { + __kmp_fini_target_mem(); + __kmp_fini_memkind(); +} /* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_stub.cpp b/openmp/runtime/src/kmp_stub.cpp index f25e24f09a03..06276d1bed1c 100644 --- a/openmp/runtime/src/kmp_stub.cpp +++ b/openmp/runtime/src/kmp_stub.cpp @@ -357,8 +357,9 @@ omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc = omp_allocator_handle_t const llvm_omp_target_device_mem_alloc = (omp_allocator_handle_t const)102; +omp_memspace_handle_t const omp_null_mem_space = (omp_memspace_handle_t const)0; omp_memspace_handle_t const omp_default_mem_space = - (omp_memspace_handle_t const)0; + (omp_memspace_handle_t const)99; omp_memspace_handle_t const omp_large_cap_mem_space = (omp_memspace_handle_t const)1; omp_memspace_handle_t const omp_const_mem_space = diff --git a/openmp/runtime/test/api/omp60_memory_routines.c b/openmp/runtime/test/api/omp60_memory_routines.c new file mode 100644 index 000000000000..97b648a7a01b --- /dev/null +++ b/openmp/runtime/test/api/omp60_memory_routines.c @@ -0,0 +1,228 @@ +// RUN: %libomp-compile -Wl,--export-dynamic && %libomp-run + +// REQUIRES: linux + +// Test OpenMP 6.0 memory management routines. +// Test host runtime's basic support with an emulated offload runtime. + +#include +#include + +#define NUM_DEVICES 4 + +// +// Required offload runtime interfaces +// +extern int __tgt_get_num_devices(void) { return NUM_DEVICES; } + +extern int __tgt_get_mem_resources(int num_devices, const int *devices, + int host, omp_memspace_handle_t memspace, + int *resources) { + int i; + // We expect valid inputs within this test. + int num_resources = num_devices; + if (resources) { + // Simple resouce ID mapping example in the backend (=device ID). + // This does not represent any real backend. + for (i = 0; i < num_devices; i++) + resources[i] = devices[i]; + } + return num_resources; +} + +extern void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator) { + return malloc(size); +} + +extern void __tgt_omp_free(void *ptr, omp_allocator_handle_t allocator) { + free(ptr); +} + +// Code above is also used by the corresponding Fortran test + +#define CHECK_OR_RET_FAIL(Expr) \ + do { \ + if (!(Expr)) \ + return EXIT_FAILURE; \ + } while (0) + +// Test user-initialized allocator with the given memory space +static int test_user_allocator(omp_memspace_handle_t ms) { + omp_allocator_handle_t al = omp_null_allocator; + al = omp_init_allocator(ms, 0, NULL); + CHECK_OR_RET_FAIL(al != omp_null_allocator); + void *m = omp_alloc(1024, al); + CHECK_OR_RET_FAIL(m != NULL); + omp_free(m, al); + omp_destroy_allocator(al); + return EXIT_SUCCESS; +} + +static int test_allocator(omp_allocator_handle_t al) { + void *m = omp_alloc(1024, al); + CHECK_OR_RET_FAIL(m != NULL); + omp_free(m, al); + omp_destroy_allocator(al); + return EXIT_SUCCESS; +} + +static int test_mem_space(void) { + int i, count; + int num_devices = omp_get_num_devices(); + CHECK_OR_RET_FAIL(num_devices == NUM_DEVICES); + + int *all_devices = (int *)malloc(sizeof(int) * num_devices); + for (i = 0; i < num_devices; i++) + all_devices[i] = i; + + omp_memspace_handle_t predef = omp_default_mem_space; + omp_memspace_handle_t ms1 = omp_null_mem_space; + omp_memspace_handle_t ms2 = omp_null_mem_space; + + // Test the following API routines. + // * omp_get_device_memspace + // * omp_get_device_and_host_memspace + // * omp_get_devices_memspace + // * omp_get_devices_and_host_memspace + // Test if runtime returns the same memory space handle for the same input. + // Test if we can use the memory space to intialize allocator. + for (i = 0; i < num_devices; i++) { + ms1 = omp_get_device_memspace(i, predef); + CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space); + ms2 = omp_get_device_memspace(i, predef); + CHECK_OR_RET_FAIL(ms1 == ms2); + CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS); + ms1 = ms2 = omp_null_mem_space; + + ms1 = omp_get_device_and_host_memspace(i, predef); + CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space); + ms2 = omp_get_device_and_host_memspace(i, predef); + CHECK_OR_RET_FAIL(ms1 == ms2); + CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS); + ms1 = ms2 = omp_null_mem_space; + + for (count = 1; i + count <= num_devices; count++) { + int *devices = &all_devices[i]; + ms1 = omp_get_devices_memspace(count, devices, predef); + CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space); + ms2 = omp_get_devices_memspace(count, devices, predef); + CHECK_OR_RET_FAIL(ms1 == ms2); + CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS); + ms1 = ms2 = omp_null_mem_space; + + ms1 = omp_get_devices_and_host_memspace(count, devices, predef); + CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space); + ms2 = omp_get_devices_and_host_memspace(count, devices, predef); + CHECK_OR_RET_FAIL(ms1 == ms2); + CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS); + ms1 = ms2 = omp_null_mem_space; + } + } + + // Test the following API routines. + // * omp_get_devices_all_memspace + // Test if runtime returns the same memory space handle for the same input. + ms1 = omp_get_devices_all_memspace(predef); + CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space); + ms2 = omp_get_devices_all_memspace(predef); + CHECK_OR_RET_FAIL(ms1 == ms2); + + free(all_devices); + + return EXIT_SUCCESS; +} + +static int test_mem_allocator(void) { + int i, count; + int num_devices = omp_get_num_devices(); + CHECK_OR_RET_FAIL(num_devices == NUM_DEVICES); + + int *all_devices = (int *)malloc(sizeof(int) * num_devices); + for (i = 0; i < num_devices; i++) + all_devices[i] = i; + + omp_memspace_handle_t predef = omp_default_mem_space; + omp_allocator_handle_t al = omp_null_allocator; + + // Test the following API routines. + // * omp_get_device_allocator + // * omp_get_device_and_host_allocator + // * omp_get_devices_allocator + // * omp_get_devices_and_host_allocator + for (i = 0; i < num_devices; i++) { + al = omp_get_device_allocator(i, predef); + CHECK_OR_RET_FAIL(al != omp_null_allocator); + CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS); + al = omp_null_allocator; + + al = omp_get_device_and_host_allocator(i, predef); + CHECK_OR_RET_FAIL(al != omp_null_allocator); + CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS); + al = omp_null_allocator; + + for (count = 1; i + count <= num_devices; count++) { + int *devices = &all_devices[i]; + al = omp_get_devices_allocator(count, devices, predef); + CHECK_OR_RET_FAIL(al != omp_null_allocator); + CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS); + al = omp_null_allocator; + + al = omp_get_devices_and_host_allocator(count, devices, predef); + CHECK_OR_RET_FAIL(al != omp_null_allocator); + CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS); + al = omp_null_allocator; + } + } + + // Test the following API routines. + // * omp_get_devices_all_allocator + al = omp_get_devices_all_allocator(predef); + CHECK_OR_RET_FAIL(al != omp_null_allocator); + CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS); + + free(all_devices); + + return EXIT_SUCCESS; +} + +// Just test what we can expect from the emulated backend. +static int test_sub_mem_space(void) { + int i; + omp_memspace_handle_t ms = omp_null_mem_space; + ms = omp_get_devices_all_memspace(omp_default_mem_space); + CHECK_OR_RET_FAIL(ms != omp_null_mem_space); + int num_resources = omp_get_memspace_num_resources(ms); + CHECK_OR_RET_FAIL(num_resources == NUM_DEVICES); + + // Check if single-resource sub memspace is correctly returned. + for (i = 0; i < num_resources; i++) { + omp_memspace_handle_t sub = omp_get_submemspace(ms, 1, &i); + CHECK_OR_RET_FAIL(sub != omp_null_mem_space); + CHECK_OR_RET_FAIL(sub != ms); + int num_sub_resources = omp_get_memspace_num_resources(sub); + CHECK_OR_RET_FAIL(num_sub_resources == 1); + } + + // Check if all-resrouce sub memspace is correctly returned. + int *resources = (int *)malloc(sizeof(int) * num_resources); + for (i = 0; i < num_resources; i++) + resources[i] = i; + omp_memspace_handle_t sub = omp_get_submemspace(ms, num_resources, resources); + CHECK_OR_RET_FAIL(sub != omp_null_mem_space); + CHECK_OR_RET_FAIL(sub == ms); + + return EXIT_SUCCESS; +} + +int main() { + int rc = test_mem_space(); + CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS); + + rc = test_mem_allocator(); + CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS); + + rc = test_sub_mem_space(); + CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS); + + return rc; +}