mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 00:46:06 +00:00

Add option and statement attribute for controlling emitting of target-specific metadata to atomicrmw instructions in IR. The RFC for this attribute and option is https://discourse.llvm.org/t/rfc-add-clang-atomic-control-options-and-pragmas/80641, Originally a pragma was proposed, then it was changed to clang attribute. This attribute allows users to specify one, two, or all three options and must be applied to a compound statement. The attribute can also be nested, with inner attributes overriding the options specified by outer attributes or the target's default options. These options will then determine the target-specific metadata added to atomic instructions in the IR. In addition to the attribute, three new compiler options are introduced: `-f[no-]atomic-remote-memory`, `-f[no-]atomic-fine-grained-memory`, `-f[no-]atomic-ignore-denormal-mode`. These compiler options allow users to override the default options through the Clang driver and front end. `-m[no-]unsafe-fp-atomics` is aliased to `-f[no-]ignore-denormal-mode`. In terms of implementation, the atomic attribute is represented in the AST by the existing AttributedStmt, with minimal changes to AST and Sema. During code generation in Clang, the CodeGenModule maintains the current atomic options, which are used to emit the relevant metadata for atomic instructions. RAII is used to manage the saving and restoring of atomic options when entering and exiting nested AttributedStmt.
466 lines
29 KiB
Plaintext
466 lines
29 KiB
Plaintext
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
|
|
// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
|
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
|
|
// RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=DEV %s
|
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
|
|
// RUN: -fatomic-fine-grained-memory -fatomic-ignore-denormal-mode \
|
|
// RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=OPT %s
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
// HOST-LABEL: define dso_local void @_Z12test_defaultPf(
|
|
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// HOST-NEXT: [[ENTRY:.*:]]
|
|
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
|
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
|
|
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: ret void
|
|
//
|
|
// DEV-LABEL: define dso_local void @_Z12test_defaultPf(
|
|
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// DEV-NEXT: [[ENTRY:.*:]]
|
|
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]], !amdgpu.no.remote.memory [[META4]]
|
|
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: ret void
|
|
//
|
|
// OPT-LABEL: define dso_local void @_Z12test_defaultPf(
|
|
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// OPT-NEXT: [[ENTRY:.*:]]
|
|
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4:![0-9]+]], !amdgpu.ignore.denormal.mode [[META4]]
|
|
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: ret void
|
|
//
|
|
__device__ __host__ void test_default(float *a) {
|
|
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
|
|
}
|
|
|
|
// HOST-LABEL: define dso_local void @_Z8test_onePf(
|
|
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// HOST-NEXT: [[ENTRY:.*:]]
|
|
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
|
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
|
|
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: ret void
|
|
//
|
|
// DEV-LABEL: define dso_local void @_Z8test_onePf(
|
|
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// DEV-NEXT: [[ENTRY:.*:]]
|
|
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
|
|
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: ret void
|
|
//
|
|
// OPT-LABEL: define dso_local void @_Z8test_onePf(
|
|
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// OPT-NEXT: [[ENTRY:.*:]]
|
|
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
|
|
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: ret void
|
|
//
|
|
__device__ __host__ void test_one(float *a) {
|
|
[[clang::atomic(no_remote_memory)]] {
|
|
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
|
|
}
|
|
}
|
|
|
|
// HOST-LABEL: define dso_local void @_Z8test_twoPf(
|
|
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// HOST-NEXT: [[ENTRY:.*:]]
|
|
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
|
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
|
|
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: ret void
|
|
//
|
|
// DEV-LABEL: define dso_local void @_Z8test_twoPf(
|
|
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// DEV-NEXT: [[ENTRY:.*:]]
|
|
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
|
|
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: ret void
|
|
//
|
|
// OPT-LABEL: define dso_local void @_Z8test_twoPf(
|
|
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// OPT-NEXT: [[ENTRY:.*:]]
|
|
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]]
|
|
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: ret void
|
|
//
|
|
__device__ __host__ void test_two(float *a) {
|
|
[[clang::atomic(remote_memory, ignore_denormal_mode)]] {
|
|
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
|
|
}
|
|
}
|
|
|
|
// HOST-LABEL: define dso_local void @_Z10test_threePf(
|
|
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// HOST-NEXT: [[ENTRY:.*:]]
|
|
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
|
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
|
|
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: ret void
|
|
//
|
|
// DEV-LABEL: define dso_local void @_Z10test_threePf(
|
|
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// DEV-NEXT: [[ENTRY:.*:]]
|
|
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]]
|
|
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: ret void
|
|
//
|
|
// OPT-LABEL: define dso_local void @_Z10test_threePf(
|
|
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// OPT-NEXT: [[ENTRY:.*:]]
|
|
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]]
|
|
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: ret void
|
|
//
|
|
__device__ __host__ void test_three(float *a) {
|
|
[[clang::atomic(no_remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
|
|
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
|
|
}
|
|
}
|
|
|
|
// HOST-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
|
|
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// HOST-NEXT: [[ENTRY:.*:]]
|
|
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
|
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
|
|
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: ret void
|
|
//
|
|
// DEV-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
|
|
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// DEV-NEXT: [[ENTRY:.*:]]
|
|
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]]
|
|
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: ret void
|
|
//
|
|
// OPT-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
|
|
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// OPT-NEXT: [[ENTRY:.*:]]
|
|
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]]
|
|
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: ret void
|
|
//
|
|
__device__ __host__ void test_multiple_attrs(float *a) {
|
|
[[clang::atomic(no_remote_memory)]] [[clang::atomic(remote_memory)]] {
|
|
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
|
|
}
|
|
}
|
|
|
|
// HOST-LABEL: define dso_local void @_Z11test_nestedPf(
|
|
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// HOST-NEXT: [[ENTRY:.*:]]
|
|
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
|
|
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4
|
|
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
|
|
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
|
|
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
|
|
// HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1]], align 4
|
|
// HOST-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1]], align 4
|
|
// HOST-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] seq_cst, align 4
|
|
// HOST-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2]], align 4
|
|
// HOST-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2]], align 4
|
|
// HOST-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3]], align 4
|
|
// HOST-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3]], align 4
|
|
// HOST-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] acquire, align 4
|
|
// HOST-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4]], align 4
|
|
// HOST-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4]], align 4
|
|
// HOST-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8
|
|
// HOST-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5]], align 4
|
|
// HOST-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5]], align 4
|
|
// HOST-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] release, align 4
|
|
// HOST-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6]], align 4
|
|
// HOST-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6]], align 4
|
|
// HOST-NEXT: ret void
|
|
//
|
|
// DEV-LABEL: define dso_local void @_Z11test_nestedPf(
|
|
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// DEV-NEXT: [[ENTRY:.*:]]
|
|
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5)
|
|
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr
|
|
// DEV-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr
|
|
// DEV-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr
|
|
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
|
|
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4
|
|
// DEV-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup") acquire, align 4, !amdgpu.no.remote.memory [[META4]]
|
|
// DEV-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// DEV-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
|
|
// DEV-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4
|
|
// DEV-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4
|
|
// DEV-NEXT: ret void
|
|
//
|
|
// OPT-LABEL: define dso_local void @_Z11test_nestedPf(
|
|
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
|
|
// OPT-NEXT: [[ENTRY:.*:]]
|
|
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5)
|
|
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr
|
|
// OPT-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr
|
|
// OPT-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr
|
|
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
|
|
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4
|
|
// OPT-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup") acquire, align 4, !amdgpu.no.remote.memory [[META4]]
|
|
// OPT-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
|
|
// OPT-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
|
|
// OPT-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4
|
|
// OPT-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4
|
|
// OPT-NEXT: ret void
|
|
//
|
|
__device__ __host__ void test_nested(float *a) {
|
|
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
|
|
{
|
|
[[clang::atomic(remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
|
|
__scoped_atomic_fetch_max(a, 2, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
|
|
{
|
|
[[clang::atomic(no_remote_memory)]] {
|
|
__scoped_atomic_fetch_min(a, 3, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP);
|
|
}
|
|
}
|
|
{
|
|
[[clang::atomic(no_fine_grained_memory)]] {
|
|
__scoped_atomic_fetch_sub(a, 4, __ATOMIC_RELEASE, __MEMORY_SCOPE_WVFRNT);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
//
|
|
//
|
|
//
|
|
template<typename T> __device__ __host__ void test_template(T *a) {
|
|
[[clang::atomic(no_remote_memory, fine_grained_memory)]] {
|
|
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
|
|
}
|
|
}
|
|
|
|
template __device__ __host__ void test_template<float>(float *a);
|
|
|
|
//.
|
|
// DEV: [[META4]] = !{}
|
|
//.
|
|
// OPT: [[META4]] = !{}
|
|
//.
|