[OpenMP] 6.0 (TR11) Memory Management Update (#97106)

TR11 introduced changes to support target memory management in a unified
way by defining a series of API routines and additional traits. Host
runtime is oblivious to how actual memory resources are mapped when
using the new API routines, so it can only support how the composed
memory space is maintained, and the offload backend must handle which
memory resources are actually used to allocate memory from the memory
space.

Here is summary of the implementation.
* Implemented 12 API routines to get/mainpulate memory space/allocator.
* Memory space composed with a list of devices has a state with resource
description, and runtime is responsible for maintaining the allocated
memory space objects.
* Defined interface with offload runtime to access memory resource list,
and to redirect calls to omp_alloc/omp_free since it requires
backend-specific information.
* Value of omp_default_mem_space changed from 0 to 99, and
omp_null_mem_space took the value 0 as defined in the language.
* New allocator traits were introduced, but how to use them is up to the
offload backend.
* Added basic tests for the new API routines.
This commit is contained in:
Hansang Bae 2025-04-02 17:16:30 -05:00 committed by GitHub
parent acc6bcdc50
commit 8100bd58a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 1006 additions and 20 deletions

View File

@ -532,6 +532,18 @@ kmp_set_disp_num_buffers 890
omp_get_device_num 896
omp_init_allocator 897
omp_destroy_allocator 898
omp_get_devices_memspace 810
omp_get_device_memspace 811
omp_get_devices_and_host_memspace 812
omp_get_device_and_host_memspace 813
omp_get_devices_all_memspace 814
omp_get_devices_allocator 815
omp_get_device_allocator 816
omp_get_devices_and_host_allocator 817
omp_get_device_and_host_allocator 818
omp_get_devices_all_allocator 819
omp_get_memspace_num_resources 820
omp_get_submemspace 821
%ifndef stub
__kmpc_set_default_allocator
__kmpc_get_default_allocator
@ -592,6 +604,7 @@ kmp_set_disp_num_buffers 890
llvm_omp_target_host_mem_space DATA
llvm_omp_target_shared_mem_space DATA
llvm_omp_target_device_mem_space DATA
omp_null_mem_space DATA
%ifndef stub
# Ordinals between 900 and 999 are reserved

View File

@ -339,7 +339,13 @@
omp_atk_fallback = 5,
omp_atk_fb_data = 6,
omp_atk_pinned = 7,
omp_atk_partition = 8
omp_atk_partition = 8,
omp_atk_pin_device = 9,
omp_atk_preferred_device = 10,
omp_atk_device_access = 11,
omp_atk_target_access = 12,
omp_atk_atomic_scope = 13,
omp_atk_part_size = 14
} omp_alloctrait_key_t;
typedef enum {
@ -350,7 +356,7 @@
omp_atv_serialized = 5,
omp_atv_sequential = omp_atv_serialized, // (deprecated)
omp_atv_private = 6,
omp_atv_all = 7,
omp_atv_device = 7,
omp_atv_thread = 8,
omp_atv_pteam = 9,
omp_atv_cgroup = 10,
@ -361,7 +367,11 @@
omp_atv_environment = 15,
omp_atv_nearest = 16,
omp_atv_blocked = 17,
omp_atv_interleaved = 18
omp_atv_interleaved = 18,
omp_atv_all = 19,
omp_atv_single = 20,
omp_atv_multiple = 21,
omp_atv_memspace = 22
} omp_alloctrait_value_t;
#define omp_atv_default ((omp_uintptr_t)-1)
@ -387,6 +397,7 @@
extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
typedef omp_uintptr_t omp_memspace_handle_t;
extern __KMP_IMP omp_memspace_handle_t const omp_null_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space;
@ -422,7 +433,8 @@
typedef enum omp_memspace_handle_t
# endif
{
omp_default_mem_space = 0,
omp_null_mem_space = 0,
omp_default_mem_space = 99,
omp_large_cap_mem_space = 1,
omp_const_mem_space = 2,
omp_high_bw_mem_space = 3,
@ -463,6 +475,20 @@
extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, omp_allocator_handle_t a);
# endif
/* OpenMP TR11 routines to get memory spaces and allocators */
extern omp_memspace_handle_t omp_get_devices_memspace(int ndevs, const int *devs, omp_memspace_handle_t memspace);
extern omp_memspace_handle_t omp_get_device_memspace(int dev, omp_memspace_handle_t memspace);
extern omp_memspace_handle_t omp_get_devices_and_host_memspace(int ndevs, const int *devs, omp_memspace_handle_t memspace);
extern omp_memspace_handle_t omp_get_device_and_host_memspace(int dev, omp_memspace_handle_t memspace);
extern omp_memspace_handle_t omp_get_devices_all_memspace(omp_memspace_handle_t memspace);
extern omp_allocator_handle_t omp_get_devices_allocator(int ndevs, const int *devs, omp_memspace_handle_t memspace);
extern omp_allocator_handle_t omp_get_device_allocator(int dev, omp_memspace_handle_t memspace);
extern omp_allocator_handle_t omp_get_devices_and_host_allocator(int ndevs, const int *devs, omp_memspace_handle_t memspace);
extern omp_allocator_handle_t omp_get_device_and_host_allocator(int dev, omp_memspace_handle_t memspace);
extern omp_allocator_handle_t omp_get_devices_all_allocator(omp_memspace_handle_t memspace);
extern int omp_get_memspace_num_resources(omp_memspace_handle_t memspace);
extern omp_memspace_handle_t omp_get_submemspace(omp_memspace_handle_t memspace, int num_resources, int *resources);
/* OpenMP 5.0 Affinity Format */
extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *);
extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t);

View File

@ -145,6 +145,12 @@
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_fb_data = 6
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_pinned = 7
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_partition = 8
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_pin_device = 9
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_preferred_device = 10
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_device_access = 11
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_target_access = 12
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_atomic_scope = 13
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_part_size = 14
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_default = -1
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_false = 0
@ -154,7 +160,7 @@
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_serialized = 5
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_sequential = omp_atv_serialized
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_private = 6
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_all = 7
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_device = 7
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_thread = 8
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_pteam = 9
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_cgroup = 10
@ -166,6 +172,10 @@
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_nearest = 16
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_blocked = 17
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_interleaved = 18
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_all = 19
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_single = 20
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_multiple = 21
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_memspace = 22
integer (kind=omp_allocator_handle_kind), parameter, public :: omp_null_allocator = 0
integer (kind=omp_allocator_handle_kind), parameter, public :: omp_default_mem_alloc = 1
@ -180,7 +190,8 @@
integer (kind=omp_allocator_handle_kind), parameter, public :: llvm_omp_target_shared_mem_alloc = 101
integer (kind=omp_allocator_handle_kind), parameter, public :: llvm_omp_target_device_mem_alloc = 102
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_default_mem_space = 0
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_null_mem_space = 0
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_default_mem_space = 99
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_large_cap_mem_space = 1
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_const_mem_space = 2
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_high_bw_mem_space = 3
@ -802,6 +813,97 @@
logical (kind=omp_logical_kind) omp_in_explicit_task
end function omp_in_explicit_task
function omp_get_devices_memspace(ndevs, devs, memspace)
use omp_lib_kinds
integer(omp_memspace_handle_kind) :: omp_get_devices_memspace
integer, intent(in) :: ndevs
integer, intent(in) :: devs(*)
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_memspace
function omp_get_device_memspace(dev, memspace)
use omp_lib_kinds
integer(omp_memspace_handle_kind) :: omp_get_device_memspace
integer, intent(in) :: dev
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_device_memspace
function omp_get_devices_and_host_memspace(ndevs, devs, memspace)
use omp_lib_kinds
integer(omp_memspace_handle_kind) :: &
omp_get_devices_and_host_memspace
integer, intent(in) :: ndevs
integer, intent(in) :: devs(*)
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_and_host_memspace
function omp_get_device_and_host_memspace(dev, memspace)
use omp_lib_kinds
integer(omp_memspace_handle_kind) :: &
omp_get_device_and_host_memspace
integer, intent(in) :: dev
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_device_and_host_memspace
function omp_get_devices_all_memspace(memspace)
use omp_lib_kinds
integer(omp_memspace_handle_kind) :: omp_get_devices_all_memspace
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_all_memspace
function omp_get_devices_allocator(ndevs, devs, memspace)
use omp_lib_kinds
integer(omp_allocator_handle_kind) :: omp_get_devices_allocator
integer, intent(in) :: ndevs
integer, intent(in) :: devs(*)
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_allocator
function omp_get_device_allocator(dev, memspace)
use omp_lib_kinds
integer(omp_allocator_handle_kind) :: omp_get_device_allocator
integer, intent(in) :: dev
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_device_allocator
function omp_get_devices_and_host_allocator(ndevs, devs, memspace)
use omp_lib_kinds
integer(omp_allocator_handle_kind) :: &
omp_get_devices_and_host_allocator
integer, intent(in) :: ndevs
integer, intent(in) :: devs(*)
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_and_host_allocator
function omp_get_device_and_host_allocator(dev, memspace)
use omp_lib_kinds
integer(omp_allocator_handle_kind) :: &
omp_get_device_and_host_allocator
integer, intent(in) :: dev
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_device_and_host_allocator
function omp_get_devices_all_allocator(memspace)
use omp_lib_kinds
integer(omp_allocator_handle_kind) :: &
omp_get_devices_all_allocator
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_all_allocator
function omp_get_memspace_num_resources(memspace)
use omp_lib_kinds
integer omp_get_memspace_num_resources
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_memspace_num_resources
function omp_get_submemspace(memspace, num_resources, resources)
use omp_lib_kinds
integer(omp_memspace_handle_kind) omp_get_submemspace
integer(omp_memspace_handle_kind), intent(in) :: memspace
integer, intent(in) :: num_resources
integer, intent(in) :: resources(*)
end function omp_get_submemspace
! ***
! *** kmp_* entry points
! ***

View File

@ -151,6 +151,18 @@
parameter(omp_atk_pinned=7)
integer(kind=omp_alloctrait_key_kind)omp_atk_partition
parameter(omp_atk_partition=8)
integer(kind=omp_alloctrait_key_kind)omp_atk_pin_device
parameter(omp_atk_pin_device=9)
integer(kind=omp_alloctrait_key_kind)omp_atk_preferred_device
parameter(omp_atk_preferred_device=10)
integer(kind=omp_alloctrait_key_kind)omp_atk_device_access
parameter(omp_atk_device_access=11)
integer(kind=omp_alloctrait_key_kind)omp_atk_target_access
parameter(omp_atk_target_access=12)
integer(kind=omp_alloctrait_key_kind)omp_atk_atomic_scope
parameter(omp_atk_atomic_scope=13)
integer(kind=omp_alloctrait_key_kind)omp_atk_part_size
parameter(omp_atk_part_size=14)
integer(kind=omp_alloctrait_val_kind)omp_atv_default
parameter(omp_atv_default=-1)
@ -170,8 +182,8 @@
parameter(omp_atv_sequential=5)
integer(kind=omp_alloctrait_val_kind)omp_atv_private
parameter(omp_atv_private=6)
integer(kind=omp_alloctrait_val_kind)omp_atv_all
parameter(omp_atv_all=7)
integer(kind=omp_alloctrait_val_kind)omp_atv_device
parameter(omp_atv_device=7)
integer(kind=omp_alloctrait_val_kind)omp_atv_thread
parameter(omp_atv_thread=8)
integer(kind=omp_alloctrait_val_kind)omp_atv_pteam
@ -194,6 +206,14 @@
parameter(omp_atv_blocked=17)
integer(kind=omp_alloctrait_val_kind)omp_atv_interleaved
parameter(omp_atv_interleaved=18)
integer(kind=omp_alloctrait_val_kind)omp_atv_all
parameter(omp_atv_all=19)
integer(kind=omp_alloctrait_val_kind)omp_atv_single
parameter(omp_atv_single=20)
integer(kind=omp_alloctrait_val_kind)omp_atv_multiple
parameter(omp_atv_multiple=21)
integer(kind=omp_alloctrait_val_kind)omp_atv_memspace
parameter(omp_atv_memspace=22)
type omp_alloctrait
integer (kind=omp_alloctrait_key_kind) key
@ -225,8 +245,10 @@
integer(omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc
parameter(llvm_omp_target_device_mem_alloc=102)
integer(kind=omp_memspace_handle_kind)omp_null_mem_space
parameter(omp_null_mem_space=0)
integer(kind=omp_memspace_handle_kind)omp_default_mem_space
parameter(omp_default_mem_space=0)
parameter(omp_default_mem_space=99)
integer(kind=omp_memspace_handle_kind)omp_large_cap_mem_space
parameter(omp_large_cap_mem_space=1)
integer(kind=omp_memspace_handle_kind)omp_const_mem_space
@ -863,6 +885,98 @@
logical (kind=omp_logical_kind) omp_in_explicit_task
end function omp_in_explicit_task
function omp_get_devices_memspace(ndevs, devs, memspace)
import
integer(omp_memspace_handle_kind) :: omp_get_devices_memspace
integer, intent(in) :: ndevs
integer, intent(in) :: devs(*)
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_memspace
function omp_get_device_memspace(dev, memspace)
import
integer(omp_memspace_handle_kind) :: omp_get_device_memspace
integer, intent(in) :: dev
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_device_memspace
function omp_get_devices_and_host_memspace(ndevs,devs,memspace)
import
integer(omp_memspace_handle_kind) :: &
& omp_get_devices_and_host_memspace
integer, intent(in) :: ndevs
integer, intent(in) :: devs(*)
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_and_host_memspace
function omp_get_device_and_host_memspace(dev, memspace)
import
integer(omp_memspace_handle_kind) :: &
& omp_get_device_and_host_memspace
integer, intent(in) :: dev
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_device_and_host_memspace
function omp_get_devices_all_memspace(memspace)
import
integer(omp_memspace_handle_kind)::omp_get_devices_all_memspace
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_all_memspace
function omp_get_devices_allocator(ndevs, devs, memspace)
import
integer(omp_allocator_handle_kind)::omp_get_devices_allocator
integer, intent(in) :: ndevs
integer, intent(in) :: devs(*)
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_allocator
function omp_get_device_allocator(dev, memspace)
import
integer(omp_allocator_handle_kind) :: omp_get_device_allocator
integer, intent(in) :: dev
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_device_allocator
function omp_get_devices_and_host_allocator(ndevs,devs,memspace)
import
integer(omp_allocator_handle_kind) :: &
& omp_get_devices_and_host_allocator
integer, intent(in) :: ndevs
integer, intent(in) :: devs(*)
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_and_host_allocator
function omp_get_device_and_host_allocator(dev, memspace)
import
integer(omp_allocator_handle_kind) :: &
& omp_get_device_and_host_allocator
integer, intent(in) :: dev
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_device_and_host_allocator
function omp_get_devices_all_allocator(memspace)
import
integer(omp_allocator_handle_kind) :: &
& omp_get_devices_all_allocator
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_devices_all_allocator
function omp_get_memspace_num_resources(memspace)
import
integer omp_get_memspace_num_resources
integer(omp_memspace_handle_kind), intent(in) :: memspace
end function omp_get_memspace_num_resources
function omp_get_submemspace(memspace, num_resources, resources)
import
integer(omp_memspace_handle_kind) omp_get_submemspace
integer(omp_memspace_handle_kind), intent(in) :: memspace
integer, intent(in) :: num_resources
integer, intent(in) :: resources(*)
end function omp_get_submemspace
! ***
! *** kmp_* entry points
! ***

View File

@ -1046,7 +1046,13 @@ typedef enum {
omp_atk_fallback = 5,
omp_atk_fb_data = 6,
omp_atk_pinned = 7,
omp_atk_partition = 8
omp_atk_partition = 8,
omp_atk_pin_device = 9,
omp_atk_preferred_device = 10,
omp_atk_device_access = 11,
omp_atk_target_access = 12,
omp_atk_atomic_scope = 13,
omp_atk_part_size = 14
} omp_alloctrait_key_t;
typedef enum {
@ -1057,7 +1063,7 @@ typedef enum {
omp_atv_serialized = 5,
omp_atv_sequential = omp_atv_serialized, // (deprecated)
omp_atv_private = 6,
omp_atv_all = 7,
omp_atv_device = 7,
omp_atv_thread = 8,
omp_atv_pteam = 9,
omp_atv_cgroup = 10,
@ -1068,11 +1074,16 @@ typedef enum {
omp_atv_environment = 15,
omp_atv_nearest = 16,
omp_atv_blocked = 17,
omp_atv_interleaved = 18
omp_atv_interleaved = 18,
omp_atv_all = 19,
omp_atv_single = 20,
omp_atv_multiple = 21,
omp_atv_memspace = 22
} omp_alloctrait_value_t;
#define omp_atv_default ((omp_uintptr_t)-1)
typedef void *omp_memspace_handle_t;
extern omp_memspace_handle_t const omp_null_mem_space;
extern omp_memspace_handle_t const omp_default_mem_space;
extern omp_memspace_handle_t const omp_large_cap_mem_space;
extern omp_memspace_handle_t const omp_const_mem_space;
@ -1081,6 +1092,7 @@ extern omp_memspace_handle_t const omp_low_lat_mem_space;
extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
extern omp_memspace_handle_t const kmp_max_mem_space;
typedef struct {
omp_alloctrait_key_t key;
@ -1109,8 +1121,15 @@ extern omp_allocator_handle_t __kmp_def_allocator;
extern int __kmp_memkind_available;
extern bool __kmp_hwloc_available;
typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
/// Memory space informaition is shared with offload runtime.
typedef struct kmp_memspace_t {
omp_memspace_handle_t memspace; // predefined input memory space
int num_resources = 0; // number of available resources
int *resources = nullptr; // available resources
kmp_memspace_t *next = nullptr; // next memory space handle
} kmp_memspace_t;
/// Memory allocator information is shared with offload runtime.
typedef struct kmp_allocator_t {
omp_memspace_handle_t memspace;
void **memkind; // pointer to memkind
@ -1120,6 +1139,12 @@ typedef struct kmp_allocator_t {
kmp_uint64 pool_size;
kmp_uint64 pool_used;
bool pinned;
omp_alloctrait_value_t partition;
int pin_device;
int preferred_device;
omp_alloctrait_value_t target_access;
omp_alloctrait_value_t atomic_scope;
size_t part_size;
#if KMP_USE_HWLOC
omp_alloctrait_value_t membind;
#endif
@ -1155,6 +1180,21 @@ extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
extern void __kmp_init_memkind();
extern void __kmp_fini_memkind();
extern void __kmp_init_target_mem();
extern void __kmp_fini_target_mem();
// OpenMP 6.0 (TR11) Memory Management support
extern omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs,
const int *devs,
omp_memspace_handle_t,
int host);
extern omp_allocator_handle_t __kmp_get_devices_allocator(int ndevs,
const int *devs,
omp_memspace_handle_t,
int host);
extern int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace);
extern omp_memspace_handle_t
__kmp_get_submemspace(omp_memspace_handle_t memspace, int num_resources,
int *resources);
/* ------------------------------------------------------------------------ */

View File

@ -1265,15 +1265,190 @@ static void *(*kmp_target_free_host)(void *ptr, int device);
static void *(*kmp_target_free_shared)(void *ptr, int device);
static void *(*kmp_target_free_device)(void *ptr, int device);
static bool __kmp_target_mem_available;
#define KMP_IS_TARGET_MEM_SPACE(MS) \
(MS == llvm_omp_target_host_mem_space || \
MS == llvm_omp_target_shared_mem_space || \
MS == llvm_omp_target_device_mem_space)
#define KMP_IS_TARGET_MEM_ALLOC(MA) \
(MA == llvm_omp_target_host_mem_alloc || \
MA == llvm_omp_target_shared_mem_alloc || \
MA == llvm_omp_target_device_mem_alloc)
#define KMP_IS_PREDEF_MEM_SPACE(MS) \
(MS == omp_null_mem_space || MS == omp_default_mem_space || \
MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
KMP_IS_TARGET_MEM_SPACE(MS))
/// Support OMP 6.0 target memory management
/// Expected offload runtime entries.
///
/// Returns number of resources and list of unique resource IDs in "resouces".
/// Runtime needs to invoke this twice to get the number of resources, allocate
/// space for the resource IDs, and finally let offload runtime write resource
/// IDs in "resources".
/// int __tgt_get_mem_resources(int num_devices, const int *devices,
/// int host_access, omp_memspace_handle_t memspace,
/// int *resources);
///
/// Redirects omp_alloc call to offload runtime.
/// void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator);
///
/// Redirects omp_free call to offload runtime.
/// void __tgt_omp_free(void *ptr, omp_allocator_handle_t);
class kmp_tgt_allocator_t {
bool supported = false;
using get_mem_resources_t = int (*)(int, const int *, int,
omp_memspace_handle_t, int *);
using omp_alloc_t = void *(*)(size_t, omp_allocator_handle_t);
using omp_free_t = void (*)(void *, omp_allocator_handle_t);
get_mem_resources_t tgt_get_mem_resources = nullptr;
omp_alloc_t tgt_omp_alloc = nullptr;
omp_free_t tgt_omp_free = nullptr;
public:
/// Initialize interface with offload runtime
void init() {
tgt_get_mem_resources =
(get_mem_resources_t)KMP_DLSYM("__tgt_get_mem_resources");
tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM("__tgt_omp_alloc");
tgt_omp_free = (omp_free_t)KMP_DLSYM("__tgt_omp_free");
supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
}
/// Obtain resource information from offload runtime. We assume offload
/// runtime backends maintain a list of unique resource IDS.
int get_mem_resources(int ndevs, const int *devs, int host,
omp_memspace_handle_t memspace, int *resources) {
if (supported)
return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
return 0;
}
/// Invoke offload runtime's memory allocation routine
void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
if (supported)
return tgt_omp_alloc(size, allocator);
return nullptr;
}
/// Invoke offload runtime's memory deallocation routine
void omp_free(void *ptr, omp_allocator_handle_t allocator) {
if (supported)
tgt_omp_free(ptr, allocator);
}
} __kmp_tgt_allocator;
extern "C" int omp_get_num_devices(void);
/// Maintain a list of target memory spaces that are identified with the
/// requested information. There will be only one unique memory space object
/// that matches the input.
class kmp_tgt_memspace_list_t {
kmp_memspace_t *memspace_list = nullptr;
KMP_LOCK_INIT(mtx);
/// Find memory space that matches the provided input
kmp_memspace_t *find(int num_resources, const int *resources,
omp_memspace_handle_t memspace) {
kmp_memspace_t *ms = memspace_list;
while (ms) {
if (ms->num_resources == num_resources && ms->memspace == memspace &&
!memcmp(ms->resources, resources, sizeof(int) * num_resources))
break;
ms = ms->next;
}
return ms;
}
/// Return memory space for the provided input. It tries to find existing
/// memory space that exactly matches the provided input or create one if
/// not found.
omp_memspace_handle_t get(int num_resources, const int *resources,
omp_memspace_handle_t memspace) {
int gtid = __kmp_entry_gtid();
__kmp_acquire_lock(&mtx, gtid);
// Sort absolute IDs in the resource list
int *sorted_resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
KMP_MEMCPY(sorted_resources, resources, num_resources * sizeof(int));
qsort(sorted_resources, (size_t)num_resources, sizeof(int),
[](const void *a, const void *b) {
const int val_a = *(const int *)a;
const int val_b = *(const int *)b;
return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
});
kmp_memspace_t *ms = find(num_resources, sorted_resources, memspace);
if (ms) {
__kmp_free(sorted_resources);
__kmp_release_lock(&mtx, gtid);
return ms;
}
ms = (kmp_memspace_t *)__kmp_allocate(sizeof(kmp_memspace_t));
ms->memspace = memspace;
ms->num_resources = num_resources;
ms->resources = sorted_resources;
ms->next = memspace_list;
memspace_list = ms;
__kmp_release_lock(&mtx, gtid);
return ms;
}
public:
/// Initialize memory space list
void init() { __kmp_init_lock(&mtx); }
/// Release resources for the memory space list
void fini() {
kmp_memspace_t *ms = memspace_list;
while (ms) {
if (ms->resources)
__kmp_free(ms->resources);
kmp_memspace_t *tmp = ms;
ms = ms->next;
__kmp_free(tmp);
}
__kmp_destroy_lock(&mtx);
}
/// Return memory space for the provided input
omp_memspace_handle_t get_memspace(int num_devices, const int *devices,
int host_access,
omp_memspace_handle_t memspace) {
int actual_num_devices = num_devices;
int *actual_devices = const_cast<int *>(devices);
if (actual_num_devices == 0) {
actual_num_devices = omp_get_num_devices();
if (actual_num_devices <= 0)
return omp_null_mem_space;
}
if (actual_devices == NULL) {
// Prepare list of all devices in this case.
actual_devices = (int *)__kmp_allocate(sizeof(int) * actual_num_devices);
for (int i = 0; i < actual_num_devices; i++)
actual_devices[i] = i;
}
// Get the number of available resources first
int num_resources = __kmp_tgt_allocator.get_mem_resources(
actual_num_devices, actual_devices, host_access, memspace, NULL);
if (num_resources <= 0)
return omp_null_mem_space; // No available resources
omp_memspace_handle_t ms = omp_null_mem_space;
if (num_resources > 0) {
int *resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
// Let offload runtime write the resource IDs
num_resources = __kmp_tgt_allocator.get_mem_resources(
actual_num_devices, actual_devices, host_access, memspace, resources);
ms = get(num_resources, resources, memspace);
__kmp_free(resources);
}
if (!devices && actual_devices)
__kmp_free(actual_devices);
return ms;
}
/// Return sub memory space from the parent memory space
omp_memspace_handle_t get_memspace(int num_resources, const int *resources,
omp_memspace_handle_t parent) {
kmp_memspace_t *ms = (kmp_memspace_t *)parent;
return get(num_resources, resources, ms->memspace);
}
} __kmp_tgt_memspace_list;
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
static inline void chk_kind(void ***pkind) {
KMP_DEBUG_ASSERT(pkind);
@ -1456,19 +1631,30 @@ void __kmp_init_target_mem() {
// lock/pin and unlock/unpin target calls
*(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
*(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
__kmp_tgt_allocator.init();
__kmp_tgt_memspace_list.init();
}
/// Finalize target memory support
void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.fini(); }
omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
int ntraits,
omp_alloctrait_t traits[]) {
// OpenMP 5.0 only allows predefined memspaces
KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
kmp_allocator_t *al;
int i;
al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
al->memspace = ms; // not used currently
// Assign default values if applicable
al->alignment = 1;
al->pinned = false;
al->partition = omp_atv_environment;
al->pin_device = -1;
al->preferred_device = -1;
al->target_access = omp_atv_single;
al->atomic_scope = omp_atv_device;
for (i = 0; i < ntraits; ++i) {
switch (traits[i].key) {
case omp_atk_sync_hint:
@ -1503,10 +1689,33 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
#endif
al->memkind = RCAST(void **, traits[i].value);
break;
case omp_atk_pin_device:
__kmp_type_convert(traits[i].value, &(al->pin_device));
break;
case omp_atk_preferred_device:
__kmp_type_convert(traits[i].value, &(al->preferred_device));
break;
case omp_atk_target_access:
al->target_access = (omp_alloctrait_value_t)traits[i].value;
break;
case omp_atk_atomic_scope:
al->atomic_scope = (omp_alloctrait_value_t)traits[i].value;
break;
case omp_atk_part_size:
__kmp_type_convert(traits[i].value, &(al->part_size));
break;
default:
KMP_ASSERT2(0, "Unexpected allocator trait");
}
}
if (al->memspace > kmp_max_mem_space) {
// Memory space has been allocated for targets.
return (omp_allocator_handle_t)al;
}
KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace));
if (al->fb == 0) {
// set default allocator
al->fb = omp_atv_default_mem_fb;
@ -1580,6 +1789,71 @@ omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
return __kmp_threads[gtid]->th.th_def_allocator;
}
omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, const int *devs,
omp_memspace_handle_t memspace,
int host) {
if (!__kmp_init_serial)
__kmp_serial_initialize();
// Only accept valid device description and predefined memory space
if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
return omp_null_mem_space;
return __kmp_tgt_memspace_list.get_memspace(ndevs, devs, host, memspace);
}
omp_allocator_handle_t
__kmp_get_devices_allocator(int ndevs, const int *devs,
omp_memspace_handle_t memspace, int host) {
if (!__kmp_init_serial)
__kmp_serial_initialize();
// Only accept valid device description and predefined memory space
if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
return omp_null_allocator;
omp_memspace_handle_t mspace =
__kmp_get_devices_memspace(ndevs, devs, memspace, host);
if (mspace == omp_null_mem_space)
return omp_null_allocator;
return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL);
}
int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) {
if (!__kmp_init_serial)
__kmp_serial_initialize();
if (memspace == omp_null_mem_space)
return 0;
if (memspace < kmp_max_mem_space)
return 1; // return 1 for predefined memory space
kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
return ms->num_resources;
}
omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace,
int num_resources, int *resources) {
if (!__kmp_init_serial)
__kmp_serial_initialize();
if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
return memspace; // return input memory space for predefined memory space
kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
if (num_resources == 0 || ms->num_resources < num_resources || !resources)
return omp_null_mem_space; // input memory space cannot satisfy the request
// The stored resource ID is an absolute ID only known to the offload backend,
// and the returned memory space will still keep the property.
int *resources_abs = (int *)__kmp_allocate(sizeof(int) * num_resources);
// Collect absolute resource ID from the relative ID
for (int i = 0; i < num_resources; i++)
resources_abs[i] = ms->resources[resources[i]];
omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.get_memspace(
num_resources, resources_abs, memspace);
__kmp_free(resources_abs);
return submemspace;
}
typedef struct kmp_mem_desc { // Memory block descriptor
void *ptr_alloc; // Pointer returned by allocator
size_t size_a; // Size of allocated memory block (initial+descriptor+align)
@ -1667,6 +1941,11 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
int use_default_allocator =
(!__kmp_hwloc_available && !__kmp_memkind_available);
if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
// Memspace has been allocated for targets.
return __kmp_tgt_allocator.omp_alloc(size, allocator);
}
if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
// Use size input directly as the memory may not be accessible on host.
// Use default device for now.
@ -2021,6 +2300,12 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
kmp_mem_desc_t desc;
kmp_uintptr_t addr_align; // address to return to caller
kmp_uintptr_t addr_descr; // address of memory block descriptor
if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
__kmp_tgt_allocator.omp_free(ptr, allocator);
return;
}
if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
(allocator > kmp_max_mem_alloc &&
KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {

View File

@ -428,6 +428,129 @@ omp_allocator_handle_t FTN_STDCALL FTN_GET_DEFAULT_ALLOCATOR(void) {
#endif
}
/* OpenMP 6.0 (TR11) Memory Management support */
omp_memspace_handle_t FTN_STDCALL
FTN_GET_DEVICES_MEMSPACE(int KMP_DEREF ndevs, const int *devs,
omp_memspace_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
return __kmp_get_devices_memspace(KMP_DEREF ndevs, devs, KMP_DEREF memspace,
0 /* host */);
#endif
}
omp_memspace_handle_t FTN_STDCALL FTN_GET_DEVICE_MEMSPACE(
int KMP_DEREF dev, omp_memspace_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
int dev_num = KMP_DEREF dev;
return __kmp_get_devices_memspace(1, &dev_num, KMP_DEREF memspace, 0);
#endif
}
omp_memspace_handle_t FTN_STDCALL
FTN_GET_DEVICES_AND_HOST_MEMSPACE(int KMP_DEREF ndevs, const int *devs,
omp_memspace_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
return __kmp_get_devices_memspace(KMP_DEREF ndevs, devs, KMP_DEREF memspace,
1);
#endif
}
omp_memspace_handle_t FTN_STDCALL FTN_GET_DEVICE_AND_HOST_MEMSPACE(
int KMP_DEREF dev, omp_memspace_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
int dev_num = KMP_DEREF dev;
return __kmp_get_devices_memspace(1, &dev_num, KMP_DEREF memspace, 1);
#endif
}
omp_memspace_handle_t FTN_STDCALL
FTN_GET_DEVICES_ALL_MEMSPACE(omp_memspace_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
return __kmp_get_devices_memspace(0, NULL, KMP_DEREF memspace, 1);
#endif
}
omp_allocator_handle_t FTN_STDCALL
FTN_GET_DEVICES_ALLOCATOR(int KMP_DEREF ndevs, const int *devs,
omp_allocator_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
return __kmp_get_devices_allocator(KMP_DEREF ndevs, devs, KMP_DEREF memspace,
0 /* host */);
#endif
}
omp_allocator_handle_t FTN_STDCALL FTN_GET_DEVICE_ALLOCATOR(
int KMP_DEREF dev, omp_allocator_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
int dev_num = KMP_DEREF dev;
return __kmp_get_devices_allocator(1, &dev_num, KMP_DEREF memspace, 0);
#endif
}
omp_allocator_handle_t FTN_STDCALL
FTN_GET_DEVICES_AND_HOST_ALLOCATOR(int KMP_DEREF ndevs, const int *devs,
omp_allocator_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
return __kmp_get_devices_allocator(KMP_DEREF ndevs, devs, KMP_DEREF memspace,
1);
#endif
}
omp_allocator_handle_t FTN_STDCALL FTN_GET_DEVICE_AND_HOST_ALLOCATOR(
int KMP_DEREF dev, omp_allocator_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
int dev_num = KMP_DEREF dev;
return __kmp_get_devices_allocator(1, &dev_num, KMP_DEREF memspace, 1);
#endif
}
omp_allocator_handle_t FTN_STDCALL
FTN_GET_DEVICES_ALL_ALLOCATOR(omp_allocator_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return NULL;
#else
return __kmp_get_devices_allocator(0, NULL, KMP_DEREF memspace, 1);
#endif
}
int FTN_STDCALL
FTN_GET_MEMSPACE_NUM_RESOURCES(omp_memspace_handle_t KMP_DEREF memspace) {
#ifdef KMP_STUB
return 0;
#else
return __kmp_get_memspace_num_resources(KMP_DEREF memspace);
#endif
}
omp_memspace_handle_t FTN_STDCALL
FTN_GET_SUBMEMSPACE(omp_memspace_handle_t KMP_DEREF memspace,
int KMP_DEREF num_resources, int *resources) {
#ifdef KMP_STUB
return NULL;
#else
return __kmp_get_submemspace(KMP_DEREF memspace, KMP_DEREF num_resources,
resources);
#endif
}
/* OpenMP 5.0 affinity format support */
#ifndef KMP_STUB
static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size,

View File

@ -127,6 +127,18 @@
#define FTN_DESTROY_ALLOCATOR omp_destroy_allocator
#define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator
#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator
#define FTN_GET_DEVICES_MEMSPACE omp_get_devices_memspace
#define FTN_GET_DEVICE_MEMSPACE omp_get_device_memspace
#define FTN_GET_DEVICES_AND_HOST_MEMSPACE omp_get_devices_and_host_memspace
#define FTN_GET_DEVICE_AND_HOST_MEMSPACE omp_get_device_and_host_memspace
#define FTN_GET_DEVICES_ALL_MEMSPACE omp_get_devices_all_memspace
#define FTN_GET_DEVICES_ALLOCATOR omp_get_devices_allocator
#define FTN_GET_DEVICE_ALLOCATOR omp_get_device_allocator
#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR omp_get_devices_and_host_allocator
#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR omp_get_device_and_host_allocator
#define FTN_GET_DEVICES_ALL_ALLOCATOR omp_get_devices_all_allocator
#define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources
#define FTN_GET_SUBMEMSPACE omp_get_submemspace
#define FTN_GET_DEVICE_NUM omp_get_device_num
#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format
#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format
@ -262,6 +274,18 @@
#define FTN_DESTROY_ALLOCATOR omp_destroy_allocator_
#define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator_
#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_
#define FTN_GET_DEVICES_MEMSPACE omp_get_devices_memspace_
#define FTN_GET_DEVICE_MEMSPACE omp_get_device_memspace_
#define FTN_GET_DEVICES_AND_HOST_MEMSPACE omp_get_devices_and_host_memspace_
#define FTN_GET_DEVICE_AND_HOST_MEMSPACE omp_get_device_and_host_memspace_
#define FTN_GET_DEVICES_ALL_MEMSPACE omp_get_devices_all_memspace_
#define FTN_GET_DEVICES_ALLOCATOR omp_get_devices_allocator_
#define FTN_GET_DEVICE_ALLOCATOR omp_get_device_allocator_
#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR omp_get_devices_and_host_allocator_
#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR omp_get_device_and_host_allocator_
#define FTN_GET_DEVICES_ALL_ALLOCATOR omp_get_devices_all_allocator_
#define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources_
#define FTN_GET_SUBMEMSPACE omp_get_submemspace_
#define FTN_ALLOC omp_alloc_
#define FTN_FREE omp_free_
#define FTN_GET_DEVICE_NUM omp_get_device_num_
@ -399,6 +423,18 @@
#define FTN_DESTROY_ALLOCATOR OMP_DESTROY_ALLOCATOR
#define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR
#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR
#define FTN_GET_DEVICES_MEMSPACE OMP_GET_DEVICES_MEMSPACE
#define FTN_GET_DEVICE_MEMSPACE OMP_GET_DEVICE_MEMSPACE
#define FTN_GET_DEVICES_AND_HOST_MEMSPACE OMP_GET_DEVICES_AND_HOST_MEMSPACE
#define FTN_GET_DEVICE_AND_HOST_MEMSPACE OMP_GET_DEVICE_AND_HOST_MEMSPACE
#define FTN_GET_DEVICES_ALL_MEMSPACE OMP_GET_DEVICES_ALL_MEMSPACE
#define FTN_GET_DEVICES_ALLOCATOR OMP_GET_DEVICES_ALLOCATOR
#define FTN_GET_DEVICE_ALLOCATOR OMP_GET_DEVICE_ALLOCATOR
#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR OMP_GET_DEVICES_AND_HOST_ALLOCATOR
#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR OMP_GET_DEVICE_AND_HOST_ALLOCATOR
#define FTN_GET_DEVICES_ALL_ALLOCATOR OMP_GET_DEVICES_ALL_ALLOCATOR
#define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES
#define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE
#define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM
#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT
#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT
@ -534,6 +570,18 @@
#define FTN_DESTROY_ALLOCATOR OMP_DESTROY_ALLOCATOR_
#define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR_
#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_
#define FTN_GET_DEVICES_MEMSPACE OMP_GET_DEVICES_MEMSPACE_
#define FTN_GET_DEVICE_MEMSPACE OMP_GET_DEVICE_MEMSPACE_
#define FTN_GET_DEVICES_AND_HOST_MEMSPACE OMP_GET_DEVICES_AND_HOST_MEMSPACE_
#define FTN_GET_DEVICE_AND_HOST_MEMSPACE OMP_GET_DEVICE_AND_HOST_MEMSPACE_
#define FTN_GET_DEVICES_ALL_MEMSPACE OMP_GET_DEVICES_ALL_MEMSPACE_
#define FTN_GET_DEVICES_ALLOCATOR OMP_GET_DEVICES_ALLOCATOR_
#define FTN_GET_DEVICE_ALLOCATOR OMP_GET_DEVICE_ALLOCATOR_
#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR OMP_GET_DEVICES_AND_HOST_ALLOCATOR_
#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR OMP_GET_DEVICE_AND_HOST_ALLOCATOR_
#define FTN_GET_DEVICES_ALL_ALLOCATOR OMP_GET_DEVICES_ALL_ALLOCATOR_
#define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES_
#define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE_
#define FTN_ALLOC OMP_ALLOC_
#define FTN_FREE OMP_FREE_
#define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM_

View File

@ -324,8 +324,9 @@ omp_allocator_handle_t const kmp_max_mem_alloc =
(omp_allocator_handle_t const)1024;
omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc;
omp_memspace_handle_t const omp_null_mem_space = (omp_memspace_handle_t const)0;
omp_memspace_handle_t const omp_default_mem_space =
(omp_memspace_handle_t const)0;
(omp_memspace_handle_t const)99;
omp_memspace_handle_t const omp_large_cap_mem_space =
(omp_memspace_handle_t const)1;
omp_memspace_handle_t const omp_const_mem_space =
@ -340,6 +341,8 @@ omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
(omp_memspace_handle_t const)101;
omp_memspace_handle_t const llvm_omp_target_device_mem_space =
(omp_memspace_handle_t const)102;
omp_memspace_handle_t const kmp_max_mem_space =
(omp_memspace_handle_t const)1024;
/* This check ensures that the compiler is passing the correct data type for the
flags formal parameter of the function kmpc_omp_task_alloc(). If the type is

View File

@ -579,7 +579,10 @@ static void __kmp_init_allocator() {
__kmp_init_memkind();
__kmp_init_target_mem();
}
static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
static void __kmp_fini_allocator() {
__kmp_fini_target_mem();
__kmp_fini_memkind();
}
/* ------------------------------------------------------------------------ */

View File

@ -357,8 +357,9 @@ omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
omp_allocator_handle_t const llvm_omp_target_device_mem_alloc =
(omp_allocator_handle_t const)102;
omp_memspace_handle_t const omp_null_mem_space = (omp_memspace_handle_t const)0;
omp_memspace_handle_t const omp_default_mem_space =
(omp_memspace_handle_t const)0;
(omp_memspace_handle_t const)99;
omp_memspace_handle_t const omp_large_cap_mem_space =
(omp_memspace_handle_t const)1;
omp_memspace_handle_t const omp_const_mem_space =

View File

@ -0,0 +1,228 @@
// RUN: %libomp-compile -Wl,--export-dynamic && %libomp-run
// REQUIRES: linux
// Test OpenMP 6.0 memory management routines.
// Test host runtime's basic support with an emulated offload runtime.
#include <stdlib.h>
#include <omp.h>
#define NUM_DEVICES 4
//
// Required offload runtime interfaces
//
extern int __tgt_get_num_devices(void) { return NUM_DEVICES; }
extern int __tgt_get_mem_resources(int num_devices, const int *devices,
int host, omp_memspace_handle_t memspace,
int *resources) {
int i;
// We expect valid inputs within this test.
int num_resources = num_devices;
if (resources) {
// Simple resouce ID mapping example in the backend (=device ID).
// This does not represent any real backend.
for (i = 0; i < num_devices; i++)
resources[i] = devices[i];
}
return num_resources;
}
extern void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator) {
return malloc(size);
}
extern void __tgt_omp_free(void *ptr, omp_allocator_handle_t allocator) {
free(ptr);
}
// Code above is also used by the corresponding Fortran test
#define CHECK_OR_RET_FAIL(Expr) \
do { \
if (!(Expr)) \
return EXIT_FAILURE; \
} while (0)
// Test user-initialized allocator with the given memory space
static int test_user_allocator(omp_memspace_handle_t ms) {
omp_allocator_handle_t al = omp_null_allocator;
al = omp_init_allocator(ms, 0, NULL);
CHECK_OR_RET_FAIL(al != omp_null_allocator);
void *m = omp_alloc(1024, al);
CHECK_OR_RET_FAIL(m != NULL);
omp_free(m, al);
omp_destroy_allocator(al);
return EXIT_SUCCESS;
}
static int test_allocator(omp_allocator_handle_t al) {
void *m = omp_alloc(1024, al);
CHECK_OR_RET_FAIL(m != NULL);
omp_free(m, al);
omp_destroy_allocator(al);
return EXIT_SUCCESS;
}
static int test_mem_space(void) {
int i, count;
int num_devices = omp_get_num_devices();
CHECK_OR_RET_FAIL(num_devices == NUM_DEVICES);
int *all_devices = (int *)malloc(sizeof(int) * num_devices);
for (i = 0; i < num_devices; i++)
all_devices[i] = i;
omp_memspace_handle_t predef = omp_default_mem_space;
omp_memspace_handle_t ms1 = omp_null_mem_space;
omp_memspace_handle_t ms2 = omp_null_mem_space;
// Test the following API routines.
// * omp_get_device_memspace
// * omp_get_device_and_host_memspace
// * omp_get_devices_memspace
// * omp_get_devices_and_host_memspace
// Test if runtime returns the same memory space handle for the same input.
// Test if we can use the memory space to intialize allocator.
for (i = 0; i < num_devices; i++) {
ms1 = omp_get_device_memspace(i, predef);
CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
ms2 = omp_get_device_memspace(i, predef);
CHECK_OR_RET_FAIL(ms1 == ms2);
CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS);
ms1 = ms2 = omp_null_mem_space;
ms1 = omp_get_device_and_host_memspace(i, predef);
CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
ms2 = omp_get_device_and_host_memspace(i, predef);
CHECK_OR_RET_FAIL(ms1 == ms2);
CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS);
ms1 = ms2 = omp_null_mem_space;
for (count = 1; i + count <= num_devices; count++) {
int *devices = &all_devices[i];
ms1 = omp_get_devices_memspace(count, devices, predef);
CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
ms2 = omp_get_devices_memspace(count, devices, predef);
CHECK_OR_RET_FAIL(ms1 == ms2);
CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS);
ms1 = ms2 = omp_null_mem_space;
ms1 = omp_get_devices_and_host_memspace(count, devices, predef);
CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
ms2 = omp_get_devices_and_host_memspace(count, devices, predef);
CHECK_OR_RET_FAIL(ms1 == ms2);
CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS);
ms1 = ms2 = omp_null_mem_space;
}
}
// Test the following API routines.
// * omp_get_devices_all_memspace
// Test if runtime returns the same memory space handle for the same input.
ms1 = omp_get_devices_all_memspace(predef);
CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
ms2 = omp_get_devices_all_memspace(predef);
CHECK_OR_RET_FAIL(ms1 == ms2);
free(all_devices);
return EXIT_SUCCESS;
}
static int test_mem_allocator(void) {
int i, count;
int num_devices = omp_get_num_devices();
CHECK_OR_RET_FAIL(num_devices == NUM_DEVICES);
int *all_devices = (int *)malloc(sizeof(int) * num_devices);
for (i = 0; i < num_devices; i++)
all_devices[i] = i;
omp_memspace_handle_t predef = omp_default_mem_space;
omp_allocator_handle_t al = omp_null_allocator;
// Test the following API routines.
// * omp_get_device_allocator
// * omp_get_device_and_host_allocator
// * omp_get_devices_allocator
// * omp_get_devices_and_host_allocator
for (i = 0; i < num_devices; i++) {
al = omp_get_device_allocator(i, predef);
CHECK_OR_RET_FAIL(al != omp_null_allocator);
CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
al = omp_null_allocator;
al = omp_get_device_and_host_allocator(i, predef);
CHECK_OR_RET_FAIL(al != omp_null_allocator);
CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
al = omp_null_allocator;
for (count = 1; i + count <= num_devices; count++) {
int *devices = &all_devices[i];
al = omp_get_devices_allocator(count, devices, predef);
CHECK_OR_RET_FAIL(al != omp_null_allocator);
CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
al = omp_null_allocator;
al = omp_get_devices_and_host_allocator(count, devices, predef);
CHECK_OR_RET_FAIL(al != omp_null_allocator);
CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
al = omp_null_allocator;
}
}
// Test the following API routines.
// * omp_get_devices_all_allocator
al = omp_get_devices_all_allocator(predef);
CHECK_OR_RET_FAIL(al != omp_null_allocator);
CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
free(all_devices);
return EXIT_SUCCESS;
}
// Just test what we can expect from the emulated backend.
static int test_sub_mem_space(void) {
int i;
omp_memspace_handle_t ms = omp_null_mem_space;
ms = omp_get_devices_all_memspace(omp_default_mem_space);
CHECK_OR_RET_FAIL(ms != omp_null_mem_space);
int num_resources = omp_get_memspace_num_resources(ms);
CHECK_OR_RET_FAIL(num_resources == NUM_DEVICES);
// Check if single-resource sub memspace is correctly returned.
for (i = 0; i < num_resources; i++) {
omp_memspace_handle_t sub = omp_get_submemspace(ms, 1, &i);
CHECK_OR_RET_FAIL(sub != omp_null_mem_space);
CHECK_OR_RET_FAIL(sub != ms);
int num_sub_resources = omp_get_memspace_num_resources(sub);
CHECK_OR_RET_FAIL(num_sub_resources == 1);
}
// Check if all-resrouce sub memspace is correctly returned.
int *resources = (int *)malloc(sizeof(int) * num_resources);
for (i = 0; i < num_resources; i++)
resources[i] = i;
omp_memspace_handle_t sub = omp_get_submemspace(ms, num_resources, resources);
CHECK_OR_RET_FAIL(sub != omp_null_mem_space);
CHECK_OR_RET_FAIL(sub == ms);
return EXIT_SUCCESS;
}
int main() {
int rc = test_mem_space();
CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS);
rc = test_mem_allocator();
CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS);
rc = test_sub_mem_space();
CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS);
return rc;
}