mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 19:16:05 +00:00

We should have done this for the f32/f64 case a long time ago. Now that codegen handles atomicrmw selection for the v2f16/v2bf16 case, start emitting it instead. This also does upgrade the behavior to respect a volatile qualified pointer, which was previously ignored (for the cases that don't have an explicit volatile argument).
22 lines
1.1 KiB
Plaintext
22 lines
1.1 KiB
Plaintext
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip \
|
|
// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
|
|
// RUN: -o - | FileCheck %s
|
|
|
|
#define __device__ __attribute__((device))
|
|
typedef __attribute__((address_space(3))) float *LP;
|
|
|
|
// CHECK-LABEL: test_ds_atomic_add_f32
|
|
// CHECK: %[[ADDR_ADDR:.*]] = alloca ptr, align 8, addrspace(5)
|
|
// CHECK: %[[ADDR_ADDR_ASCAST_PTR:.*]] = addrspacecast ptr addrspace(5) %[[ADDR_ADDR]] to ptr
|
|
// CHECK: store ptr %addr, ptr %[[ADDR_ADDR_ASCAST_PTR]], align 8
|
|
// CHECK: %[[ADDR_ADDR_ASCAST:.*]] = load ptr, ptr %[[ADDR_ADDR_ASCAST_PTR]], align 8
|
|
// CHECK: %[[AS_CAST:.*]] = addrspacecast ptr %[[ADDR_ADDR_ASCAST]] to ptr addrspace(3)
|
|
// CHECK: [[TMP2:%.+]] = load float, ptr %val.addr.ascast, align 4
|
|
// CHECK: [[TMP3:%.+]] = atomicrmw fadd ptr addrspace(3) %[[AS_CAST]], float [[TMP2]] monotonic, align 4
|
|
// CHECK: %4 = load ptr, ptr %rtn.ascast, align 8
|
|
// CHECK: store float [[TMP3]], ptr %4, align 4
|
|
__device__ void test_ds_atomic_add_f32(float *addr, float val) {
|
|
float *rtn;
|
|
*rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0);
|
|
}
|