mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 07:16:05 +00:00

For OpenMP target regions to piggy back on the CUDA/AMDGPU/... implementation of math functions, we include the appropriate definitions inside of an `omp begin/end declare variant match(device={arch(nvptx)})` scope. This way, the vendor specific math functions will become specialized versions of the system math functions. When a system math function is called and specialized version is available the selection logic introduced in D75779 instead call the specialized version. In contrast to the code path we used so far, the system header is actually included. This means functions without specialized versions are available and so are macro definitions. This should address PR42061, PR42798, and PR42799. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D75788
471 lines
16 KiB
C++
471 lines
16 KiB
C++
/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
|
|
*
|
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
* See https://llvm.org/LICENSE.txt for license information.
|
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
*
|
|
*===-----------------------------------------------------------------------===
|
|
*/
|
|
#ifndef __CLANG_CUDA_CMATH_H__
|
|
#define __CLANG_CUDA_CMATH_H__
|
|
#ifndef __CUDA__
|
|
#error "This file is for CUDA compilation only."
|
|
#endif
|
|
|
|
#ifndef _OPENMP
|
|
#include <limits>
|
|
#endif
|
|
|
|
// CUDA lets us use various std math functions on the device side. This file
|
|
// works in concert with __clang_cuda_math_forward_declares.h to make this work.
|
|
//
|
|
// Specifically, the forward-declares header declares __device__ overloads for
|
|
// these functions in the global namespace, then pulls them into namespace std
|
|
// with 'using' statements. Then this file implements those functions, after
|
|
// their implementations have been pulled in.
|
|
//
|
|
// It's important that we declare the functions in the global namespace and pull
|
|
// them into namespace std with using statements, as opposed to simply declaring
|
|
// these functions in namespace std, because our device functions need to
|
|
// overload the standard library functions, which may be declared in the global
|
|
// namespace or in std, depending on the degree of conformance of the stdlib
|
|
// implementation. Declaring in the global namespace and pulling into namespace
|
|
// std covers all of the known knowns.
|
|
|
|
#ifdef _OPENMP
|
|
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
|
|
#else
|
|
#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
|
|
#endif
|
|
|
|
__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
|
|
__DEVICE__ long abs(long __n) { return ::labs(__n); }
|
|
__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
|
|
__DEVICE__ double abs(double __x) { return ::fabs(__x); }
|
|
__DEVICE__ float acos(float __x) { return ::acosf(__x); }
|
|
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
|
|
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
|
|
__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
|
|
__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
|
|
__DEVICE__ float cos(float __x) { return ::cosf(__x); }
|
|
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
|
|
__DEVICE__ float exp(float __x) { return ::expf(__x); }
|
|
__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
|
|
__DEVICE__ float floor(float __x) { return ::floorf(__x); }
|
|
__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
|
|
__DEVICE__ int fpclassify(float __x) {
|
|
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
|
FP_ZERO, __x);
|
|
}
|
|
__DEVICE__ int fpclassify(double __x) {
|
|
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
|
FP_ZERO, __x);
|
|
}
|
|
__DEVICE__ float frexp(float __arg, int *__exp) {
|
|
return ::frexpf(__arg, __exp);
|
|
}
|
|
|
|
// For inscrutable reasons, the CUDA headers define these functions for us on
|
|
// Windows. For OpenMP we omit these as some old system headers have
|
|
// non-conforming `isinf(float)` and `isnan(float)` implementations that return
|
|
// an `int`. The system versions of these functions should be fine anyway.
|
|
#if !defined(_MSC_VER) && !defined(_OPENMP)
|
|
__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
|
|
__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
|
|
__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
|
|
// For inscrutable reasons, __finite(), the double-precision version of
|
|
// __finitef, does not exist when compiling for MacOS. __isfinited is available
|
|
// everywhere and is just as good.
|
|
__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
|
|
__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
|
|
__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
|
|
#endif
|
|
|
|
__DEVICE__ bool isgreater(float __x, float __y) {
|
|
return __builtin_isgreater(__x, __y);
|
|
}
|
|
__DEVICE__ bool isgreater(double __x, double __y) {
|
|
return __builtin_isgreater(__x, __y);
|
|
}
|
|
__DEVICE__ bool isgreaterequal(float __x, float __y) {
|
|
return __builtin_isgreaterequal(__x, __y);
|
|
}
|
|
__DEVICE__ bool isgreaterequal(double __x, double __y) {
|
|
return __builtin_isgreaterequal(__x, __y);
|
|
}
|
|
__DEVICE__ bool isless(float __x, float __y) {
|
|
return __builtin_isless(__x, __y);
|
|
}
|
|
__DEVICE__ bool isless(double __x, double __y) {
|
|
return __builtin_isless(__x, __y);
|
|
}
|
|
__DEVICE__ bool islessequal(float __x, float __y) {
|
|
return __builtin_islessequal(__x, __y);
|
|
}
|
|
__DEVICE__ bool islessequal(double __x, double __y) {
|
|
return __builtin_islessequal(__x, __y);
|
|
}
|
|
__DEVICE__ bool islessgreater(float __x, float __y) {
|
|
return __builtin_islessgreater(__x, __y);
|
|
}
|
|
__DEVICE__ bool islessgreater(double __x, double __y) {
|
|
return __builtin_islessgreater(__x, __y);
|
|
}
|
|
__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
|
|
__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
|
|
__DEVICE__ bool isunordered(float __x, float __y) {
|
|
return __builtin_isunordered(__x, __y);
|
|
}
|
|
__DEVICE__ bool isunordered(double __x, double __y) {
|
|
return __builtin_isunordered(__x, __y);
|
|
}
|
|
__DEVICE__ float ldexp(float __arg, int __exp) {
|
|
return ::ldexpf(__arg, __exp);
|
|
}
|
|
__DEVICE__ float log(float __x) { return ::logf(__x); }
|
|
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
|
|
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
|
|
__DEVICE__ float pow(float __base, float __exp) {
|
|
return ::powf(__base, __exp);
|
|
}
|
|
__DEVICE__ float pow(float __base, int __iexp) {
|
|
return ::powif(__base, __iexp);
|
|
}
|
|
__DEVICE__ double pow(double __base, int __iexp) {
|
|
return ::powi(__base, __iexp);
|
|
}
|
|
__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
|
|
__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
|
|
__DEVICE__ float sin(float __x) { return ::sinf(__x); }
|
|
__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
|
|
__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
|
|
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
|
|
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
|
|
|
|
// Notably missing above is nexttoward. We omit it because
|
|
// libdevice doesn't provide an implementation, and we don't want to be in the
|
|
// business of implementing tricky libm functions in this header.
|
|
|
|
#ifndef _OPENMP
|
|
|
|
// Now we've defined everything we promised we'd define in
|
|
// __clang_cuda_math_forward_declares.h. We need to do two additional things to
|
|
// fix up our math functions.
|
|
//
|
|
// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
|
|
// only sin(float) and sin(double), which means that e.g. sin(0) is
|
|
// ambiguous.
|
|
//
|
|
// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
|
|
// std. These are defined in the CUDA headers in the global namespace,
|
|
// independent of everything else we've done here.
|
|
|
|
// We can't use std::enable_if, because we want to be pre-C++11 compatible. But
|
|
// we go ahead and unconditionally define functions that are only available when
|
|
// compiling for C++11 to match the behavior of the CUDA headers.
|
|
template<bool __B, class __T = void>
|
|
struct __clang_cuda_enable_if {};
|
|
|
|
template <class __T> struct __clang_cuda_enable_if<true, __T> {
|
|
typedef __T type;
|
|
};
|
|
|
|
// Defines an overload of __fn that accepts one integral argument, calls
|
|
// __fn((double)x), and returns __retty.
|
|
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
|
|
template <typename __T> \
|
|
__DEVICE__ \
|
|
typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
|
|
__retty>::type \
|
|
__fn(__T __x) { \
|
|
return ::__fn((double)__x); \
|
|
}
|
|
|
|
// Defines an overload of __fn that accepts one two arithmetic arguments, calls
|
|
// __fn((double)x, (double)y), and returns a double.
|
|
//
|
|
// Note this is different from OVERLOAD_1, which generates an overload that
|
|
// accepts only *integral* arguments.
|
|
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
|
|
template <typename __T1, typename __T2> \
|
|
__DEVICE__ typename __clang_cuda_enable_if< \
|
|
std::numeric_limits<__T1>::is_specialized && \
|
|
std::numeric_limits<__T2>::is_specialized, \
|
|
__retty>::type \
|
|
__fn(__T1 __x, __T2 __y) { \
|
|
return __fn((double)__x, (double)__y); \
|
|
}
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
|
|
|
|
#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
|
|
#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
|
|
|
|
// Overloads for functions that don't match the patterns expected by
|
|
// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
|
|
template <typename __T1, typename __T2, typename __T3>
|
|
__DEVICE__ typename __clang_cuda_enable_if<
|
|
std::numeric_limits<__T1>::is_specialized &&
|
|
std::numeric_limits<__T2>::is_specialized &&
|
|
std::numeric_limits<__T3>::is_specialized,
|
|
double>::type
|
|
fma(__T1 __x, __T2 __y, __T3 __z) {
|
|
return std::fma((double)__x, (double)__y, (double)__z);
|
|
}
|
|
|
|
template <typename __T>
|
|
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
|
double>::type
|
|
frexp(__T __x, int *__exp) {
|
|
return std::frexp((double)__x, __exp);
|
|
}
|
|
|
|
template <typename __T>
|
|
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
|
double>::type
|
|
ldexp(__T __x, int __exp) {
|
|
return std::ldexp((double)__x, __exp);
|
|
}
|
|
|
|
template <typename __T1, typename __T2>
|
|
__DEVICE__ typename __clang_cuda_enable_if<
|
|
std::numeric_limits<__T1>::is_specialized &&
|
|
std::numeric_limits<__T2>::is_specialized,
|
|
double>::type
|
|
remquo(__T1 __x, __T2 __y, int *__quo) {
|
|
return std::remquo((double)__x, (double)__y, __quo);
|
|
}
|
|
|
|
template <typename __T>
|
|
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
|
double>::type
|
|
scalbln(__T __x, long __exp) {
|
|
return std::scalbln((double)__x, __exp);
|
|
}
|
|
|
|
template <typename __T>
|
|
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
|
double>::type
|
|
scalbn(__T __x, int __exp) {
|
|
return std::scalbn((double)__x, __exp);
|
|
}
|
|
|
|
// We need to define these overloads in exactly the namespace our standard
|
|
// library uses (including the right inline namespace), otherwise they won't be
|
|
// picked up by other functions in the standard library (e.g. functions in
|
|
// <complex>). Thus the ugliness below.
|
|
#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
#else
|
|
namespace std {
|
|
#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|
#endif
|
|
#endif
|
|
|
|
// Pull the new overloads we defined above into namespace std.
|
|
using ::acos;
|
|
using ::acosh;
|
|
using ::asin;
|
|
using ::asinh;
|
|
using ::atan;
|
|
using ::atan2;
|
|
using ::atanh;
|
|
using ::cbrt;
|
|
using ::ceil;
|
|
using ::copysign;
|
|
using ::cos;
|
|
using ::cosh;
|
|
using ::erf;
|
|
using ::erfc;
|
|
using ::exp;
|
|
using ::exp2;
|
|
using ::expm1;
|
|
using ::fabs;
|
|
using ::fdim;
|
|
using ::floor;
|
|
using ::fma;
|
|
using ::fmax;
|
|
using ::fmin;
|
|
using ::fmod;
|
|
using ::fpclassify;
|
|
using ::frexp;
|
|
using ::hypot;
|
|
using ::ilogb;
|
|
using ::isfinite;
|
|
using ::isgreater;
|
|
using ::isgreaterequal;
|
|
using ::isless;
|
|
using ::islessequal;
|
|
using ::islessgreater;
|
|
using ::isnormal;
|
|
using ::isunordered;
|
|
using ::ldexp;
|
|
using ::lgamma;
|
|
using ::llrint;
|
|
using ::llround;
|
|
using ::log;
|
|
using ::log10;
|
|
using ::log1p;
|
|
using ::log2;
|
|
using ::logb;
|
|
using ::lrint;
|
|
using ::lround;
|
|
using ::nearbyint;
|
|
using ::nextafter;
|
|
using ::pow;
|
|
using ::remainder;
|
|
using ::remquo;
|
|
using ::rint;
|
|
using ::round;
|
|
using ::scalbln;
|
|
using ::scalbn;
|
|
using ::signbit;
|
|
using ::sin;
|
|
using ::sinh;
|
|
using ::sqrt;
|
|
using ::tan;
|
|
using ::tanh;
|
|
using ::tgamma;
|
|
using ::trunc;
|
|
|
|
// Well this is fun: We need to pull these symbols in for libc++, but we can't
|
|
// pull them in with libstdc++, because its ::isinf and ::isnan are different
|
|
// than its std::isinf and std::isnan.
|
|
#ifndef __GLIBCXX__
|
|
using ::isinf;
|
|
using ::isnan;
|
|
#endif
|
|
|
|
// Finally, pull the "foobarf" functions that CUDA defines in its headers into
|
|
// namespace std.
|
|
using ::acosf;
|
|
using ::acoshf;
|
|
using ::asinf;
|
|
using ::asinhf;
|
|
using ::atan2f;
|
|
using ::atanf;
|
|
using ::atanhf;
|
|
using ::cbrtf;
|
|
using ::ceilf;
|
|
using ::copysignf;
|
|
using ::cosf;
|
|
using ::coshf;
|
|
using ::erfcf;
|
|
using ::erff;
|
|
using ::exp2f;
|
|
using ::expf;
|
|
using ::expm1f;
|
|
using ::fabsf;
|
|
using ::fdimf;
|
|
using ::floorf;
|
|
using ::fmaf;
|
|
using ::fmaxf;
|
|
using ::fminf;
|
|
using ::fmodf;
|
|
using ::frexpf;
|
|
using ::hypotf;
|
|
using ::ilogbf;
|
|
using ::ldexpf;
|
|
using ::lgammaf;
|
|
using ::llrintf;
|
|
using ::llroundf;
|
|
using ::log10f;
|
|
using ::log1pf;
|
|
using ::log2f;
|
|
using ::logbf;
|
|
using ::logf;
|
|
using ::lrintf;
|
|
using ::lroundf;
|
|
using ::modff;
|
|
using ::nearbyintf;
|
|
using ::nextafterf;
|
|
using ::powf;
|
|
using ::remainderf;
|
|
using ::remquof;
|
|
using ::rintf;
|
|
using ::roundf;
|
|
using ::scalblnf;
|
|
using ::scalbnf;
|
|
using ::sinf;
|
|
using ::sinhf;
|
|
using ::sqrtf;
|
|
using ::tanf;
|
|
using ::tanhf;
|
|
using ::tgammaf;
|
|
using ::truncf;
|
|
|
|
#ifdef _LIBCPP_END_NAMESPACE_STD
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
#else
|
|
#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|
_GLIBCXX_END_NAMESPACE_VERSION
|
|
#endif
|
|
} // namespace std
|
|
#endif
|
|
|
|
#endif // _OPENMP
|
|
|
|
#undef __DEVICE__
|
|
|
|
#endif
|