mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 15:06:06 +00:00

For OpenMP target regions to piggy back on the CUDA/AMDGPU/... implementation of math functions, we include the appropriate definitions inside of an `omp begin/end declare variant match(device={arch(nvptx)})` scope. This way, the vendor specific math functions will become specialized versions of the system math functions. When a system math function is called and specialized version is available the selection logic introduced in D75779 instead call the specialized version. In contrast to the code path we used so far, the system header is actually included. This means functions without specialized versions are available and so are macro definitions. This should address PR42061, PR42798, and PR42799. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D75788
27 lines
1.2 KiB
C++
27 lines
1.2 KiB
C++
// Test calling of device math functions.
|
|
///==========================================================================///
|
|
|
|
// REQUIRES: nvptx-registered-target
|
|
|
|
// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
|
// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
|
|
|
|
#include <cstdlib>
|
|
#include <cmath>
|
|
|
|
void test_sqrt(double a1) {
|
|
#pragma omp target
|
|
{
|
|
// CHECK-YES: call double @__nv_sqrt(double
|
|
double l1 = sqrt(a1);
|
|
// CHECK-YES: call double @__nv_pow(double
|
|
double l2 = pow(a1, a1);
|
|
// CHECK-YES: call double @__nv_modf(double
|
|
double l3 = modf(a1 + 3.5, &a1);
|
|
// CHECK-YES: call double @__nv_fabs(double
|
|
double l4 = fabs(a1);
|
|
// CHECK-YES: call i32 @__nv_abs(i32
|
|
double l5 = abs((int)a1);
|
|
}
|
|
}
|