[OpenMP] arm64_32 port for Apple WatchOS (#87246)

detect `aarch64_32` with compiler defined macro `__ARM64_ARCH_8_32__`
reuse ARM `__kmp_unnamed_critical_addr` and add `KMP_PREFIX_UNDERSCORE`
macro like AARCH64
reuse AARCH64 `__kmp_invoke_microtask`


build log for watchos armv7k + arm64_32 and watchos simulator x86_64 +
arm64

https://github.com/nihui/action-protobuf/actions/runs/8520684611/job/23337305030
This commit is contained in:
nihui 2024-04-02 23:38:32 +08:00 committed by GitHub
parent 4c7de02bc0
commit c5bbdb6494
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 47 additions and 21 deletions

View File

@ -141,7 +141,7 @@ Options for all Libraries
Options for ``libomp``
----------------------
**LIBOMP_ARCH** = ``aarch64|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|s390x``
**LIBOMP_ARCH** = ``aarch64|aarch64_32|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|s390x``
The default value for this option is chosen based on probing the compiler for
architecture macros (e.g., is ``__x86_64__`` predefined by compiler?).

View File

@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD})
# If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake
libomp_get_architecture(LIBOMP_DETECTED_ARCH)
set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING
"The architecture to build for (x86_64/i386/arm/ppc/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64/ve/s390x/wasm32).")
"The architecture to build for (x86_64/i386/arm/ppc/ppc64/ppc64le/aarch64/aarch64_32/mic/mips/mips64/riscv64/loongarch64/ve/s390x/wasm32).")
# Should assertions be enabled? They are on by default.
set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
"enable assertions?")
@ -55,6 +55,8 @@ else() # Part of LLVM build
set(LIBOMP_ARCH ppc64)
elseif(LIBOMP_NATIVE_ARCH MATCHES "powerpc")
set(LIBOMP_ARCH ppc)
elseif(LIBOMP_NATIVE_ARCH MATCHES "aarch64_32")
set(LIBOMP_ARCH aarch64_32)
elseif(LIBOMP_NATIVE_ARCH MATCHES "aarch64")
set(LIBOMP_ARCH aarch64)
elseif(LIBOMP_NATIVE_ARCH MATCHES "arm64")
@ -91,7 +93,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64")
endif()
endif()
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve s390x wasm32)
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_32 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve s390x wasm32)
set(LIBOMP_LIB_TYPE normal CACHE STRING
"Performance,Profiling,Stubs library (normal/profile/stubs)")
@ -167,6 +169,7 @@ set(IA32 FALSE)
set(INTEL64 FALSE)
set(ARM FALSE)
set(AARCH64 FALSE)
set(AARCH64_32 FALSE)
set(AARCH64_A64FX FALSE)
set(PPC64BE FALSE)
set(PPC64LE FALSE)
@ -196,6 +199,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "ppc64le") # PPC64LE architecture
set(PPC64 TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "aarch64") # AARCH64 architecture
set(AARCH64 TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "aarch64_32") # AARCH64_32 architecture
set(AARCH64_32 TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "aarch64_a64fx") # AARCH64_A64FX architecture
set(AARCH64_A64FX TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "mic") # Intel(R) Many Integrated Core Architecture

View File

@ -35,6 +35,8 @@ function(libomp_get_architecture return_arch)
#error ARCHITECTURE=arm
#elif defined(__arm__) || defined(_M_ARM) || defined(_ARM)
#error ARCHITECTURE=arm
#elif defined(__ARM64_ARCH_8_32__)
#error ARCHITECTURE=aarch64_32
#elif defined(__aarch64__) || defined(_M_ARM64)
#error ARCHITECTURE=aarch64
#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)

View File

@ -101,6 +101,8 @@ function(libomp_get_legal_arch return_arch_string)
set(${return_arch_string} "PPC64LE" PARENT_SCOPE)
elseif(${AARCH64})
set(${return_arch_string} "AARCH64" PARENT_SCOPE)
elseif(${AARCH64_32})
set(${return_arch_string} "AARCH64_32" PARENT_SCOPE)
elseif(${AARCH64_A64FX})
set(${return_arch_string} "AARCH64_A64FX" PARENT_SCOPE)
elseif(${MIPS})

View File

@ -326,6 +326,7 @@ else()
(LIBOMP_ARCH STREQUAL i386) OR
# (LIBOMP_ARCH STREQUAL arm) OR
(LIBOMP_ARCH STREQUAL aarch64) OR
(LIBOMP_ARCH STREQUAL aarch64_32) OR
(LIBOMP_ARCH STREQUAL aarch64_a64fx) OR
(LIBOMP_ARCH STREQUAL ppc64le) OR
(LIBOMP_ARCH STREQUAL ppc64) OR

View File

@ -358,7 +358,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) {
// (IA-32 architecture) or 64-bit signed (Intel(R) 64).
#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \
KMP_ARCH_PPC
KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
#define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4
#define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4
#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4

View File

@ -179,7 +179,7 @@ typedef unsigned long long kmp_uint64;
#endif /* KMP_OS_UNIX */
#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \
KMP_ARCH_PPC
KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
@ -1051,7 +1051,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC
KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
#if KMP_OS_WINDOWS
#undef KMP_MB
#define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)

View File

@ -105,6 +105,7 @@
#define KMP_ARCH_X86 0
#define KMP_ARCH_X86_64 0
#define KMP_ARCH_AARCH64 0
#define KMP_ARCH_AARCH64_32 0
#define KMP_ARCH_PPC64_ELFv1 0
#define KMP_ARCH_PPC64_ELFv2 0
#define KMP_ARCH_PPC64_XCOFF 0
@ -157,6 +158,9 @@
#define KMP_ARCH_PPC_XCOFF 1
#undef KMP_ARCH_PPC
#define KMP_ARCH_PPC 1
#elif defined __ARM64_ARCH_8_32__
#undef KMP_ARCH_AARCH64_32
#define KMP_ARCH_AARCH64_32 1
#elif defined __aarch64__
#undef KMP_ARCH_AARCH64
#define KMP_ARCH_AARCH64 1
@ -244,7 +248,7 @@
/* Specify 32 bit architectures here */
#define KMP_32_BIT_ARCH \
(KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \
KMP_ARCH_PPC)
KMP_ARCH_PPC || KMP_ARCH_AARCH64_32)
// Platforms which support Intel(R) Many Integrated Core Architecture
#define KMP_MIC_SUPPORTED \
@ -254,7 +258,8 @@
#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \
KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \
KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE + \
KMP_ARCH_S390X + KMP_ARCH_WASM + KMP_ARCH_PPC)
KMP_ARCH_S390X + KMP_ARCH_WASM + KMP_ARCH_PPC + \
KMP_ARCH_AARCH64_32)
#error Unknown or unsupported architecture
#endif

View File

@ -8926,7 +8926,7 @@ __kmp_determine_reduction_method(
// KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
KMP_ARCH_WASM || KMP_ARCH_PPC
KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \

View File

@ -108,7 +108,7 @@ KMP_PREFIX_UNDERSCORE(\proc):
# endif // KMP_OS_DARWIN
#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
# if KMP_OS_DARWIN
# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
@ -176,7 +176,7 @@ KMP_PREFIX_UNDERSCORE(\proc):
.endm
# endif // KMP_OS_DARWIN
#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
.macro COMMON name, size, align_power
#if KMP_OS_DARWIN
@ -1236,7 +1236,7 @@ KMP_LABEL(kmp_1_exit):
#endif /* KMP_ARCH_X86_64 */
// '
#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64
#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
//------------------------------------------------------------------------
// int
@ -1360,7 +1360,7 @@ KMP_LABEL(kmp_1):
DEBUG_INFO __kmp_invoke_microtask
// -- End __kmp_invoke_microtask
#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 */
#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */
#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM
@ -1505,7 +1505,7 @@ KMP_LABEL(kmp_1):
DEBUG_INFO __kmp_invoke_microtask
// -- End __kmp_invoke_microtask
#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 */
#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */
#if KMP_ARCH_PPC64
@ -2405,18 +2405,21 @@ __kmp_invoke_microtask:
#endif /* KMP_ARCH_S390X */
#if KMP_ARCH_ARM || KMP_ARCH_MIPS
#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32
#ifndef KMP_PREFIX_UNDERSCORE
# define KMP_PREFIX_UNDERSCORE(x) x
#endif
.data
COMMON .gomp_critical_user_, 32, 3
.data
.align 4
.global __kmp_unnamed_critical_addr
__kmp_unnamed_critical_addr:
.global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
.4byte .gomp_critical_user_
#ifdef __ELF__
.size __kmp_unnamed_critical_addr,4
.size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4
#endif
#endif /* KMP_ARCH_ARM */
#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */
#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \
KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \

View File

@ -2635,7 +2635,8 @@ finish: // Clean up and exit.
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \
((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \
KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF)
KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF || \
KMP_ARCH_AARCH64_32)
// Because WebAssembly will use `call_indirect` to invoke the microtask and
// WebAssembly indirect calls check that the called signature is a precise

View File

@ -186,7 +186,7 @@ ompt_label_##id:
#define print_possible_return_addresses(addr) \
printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
((char *)addr) - 8, ((char *)addr) - 12)
#elif KMP_ARCH_AARCH64
#elif KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32
// On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
// store instruction (another 4 bytes long).
// FIXME: PR #65696 addded a third possibility (12 byte offset) to make the

View File

@ -53,6 +53,8 @@ sub canon_arch($) {
$arch = "ppc64le";
} elsif ( $arch =~ m{\Appc64} ) {
$arch = "ppc64";
} elsif ( $arch =~ m{\Aaarch64_32} ) {
$arch = "aarch64_32";
} elsif ( $arch =~ m{\Aaarch64} ) {
$arch = "aarch64";
} elsif ( $arch =~ m{\Amic} ) {
@ -97,6 +99,7 @@ sub canon_mic_arch($) {
"32e" => "Intel(R) 64",
"arm" => "ARM",
"aarch64" => "AArch64",
"aarch64_32" => "AArch64_32",
"loongarch64" => "LoongArch64",
"mic" => "Intel(R) Many Integrated Core Architecture",
"mips" => "MIPS",
@ -222,6 +225,8 @@ sub target_options() {
$_host_arch = "ppc64le";
} elsif ( $hardware_platform eq "ppc64" ) {
$_host_arch = "ppc64";
} elsif ( $hardware_platform eq "aarch64_32" ) {
$_host_arch = "aarch64_32";
} elsif ( $hardware_platform eq "aarch64" ) {
$_host_arch = "aarch64";
} elsif ( $hardware_platform eq "mips64" ) {

View File

@ -150,6 +150,8 @@ if ( 0 ) {
$values{ hardware_platform } = "ppc64le";
} elsif ( $values{ machine } =~ m{\Appc64\z} ) {
$values{ hardware_platform } = "ppc64";
} elsif ( $values{ machine } =~ m{\Aaarch64_32\z} ) {
$values{ hardware_platform } = "aarch64_32";
} elsif ( $values{ machine } =~ m{\Aaarch64\z} ) {
$values{ hardware_platform } = "aarch64";
} elsif ( $values{ machine } =~ m{\Amips64\z} ) {