mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-14 20:56:26 +00:00
[libclc] Move exp, exp2 and expm1 to the CLC library (#133932)
These all share the use of a common helper function so are handled in one go. These builtins are also now vectorized.
This commit is contained in:
parent
602d05fbe8
commit
f14ff59da7
@ -6,9 +6,15 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#ifndef __CLC_MATH_CLC_EXP_H__
|
||||
#define __CLC_MATH_CLC_EXP_H__
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n);
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_exp
|
||||
|
||||
#endif
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_EXP_H__
|
20
libclc/clc/include/clc/math/clc_exp2.h
Normal file
20
libclc/clc/include/clc/math/clc_exp2.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_EXP2_H__
|
||||
#define __CLC_MATH_CLC_EXP2_H__
|
||||
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_exp2
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_EXP2_H__
|
20
libclc/clc/include/clc/math/clc_exp_helper.h
Normal file
20
libclc/clc/include/clc/math/clc_exp_helper.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_EXP_HELPER
|
||||
#define __CLC_MATH_CLC_EXP_HELPER
|
||||
|
||||
#define __DOUBLE_ONLY
|
||||
#define __CLC_BODY <clc/math/clc_exp_helper.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __DOUBLE_ONLY
|
||||
|
||||
#endif // __CLC_MATH_CLC_EXP_HELPER
|
13
libclc/clc/include/clc/math/clc_exp_helper.inc
Normal file
13
libclc/clc/include/clc/math/clc_exp_helper.inc
Normal file
@ -0,0 +1,13 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE __clc_exp_helper(__CLC_GENTYPE x,
|
||||
__CLC_GENTYPE x_min,
|
||||
__CLC_GENTYPE x_max,
|
||||
__CLC_GENTYPE r,
|
||||
__CLC_INTN n);
|
20
libclc/clc/include/clc/math/clc_expm1.h
Normal file
20
libclc/clc/include/clc/math/clc_expm1.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_EXPM1_H__
|
||||
#define __CLC_MATH_CLC_EXPM1_H__
|
||||
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_expm1
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_EXPM1_H__
|
@ -32,7 +32,11 @@ math/clc_ceil.cl
|
||||
math/clc_copysign.cl
|
||||
math/clc_cospi.cl
|
||||
math/clc_ep_log.cl
|
||||
math/clc_exp.cl
|
||||
math/clc_exp10.cl
|
||||
math/clc_exp2.cl
|
||||
math/clc_expm1.cl
|
||||
math/clc_exp_helper.cl
|
||||
math/clc_fabs.cl
|
||||
math/clc_fma.cl
|
||||
math/clc_fmod.cl
|
||||
|
19
libclc/clc/lib/generic/math/clc_exp.cl
Normal file
19
libclc/clc/lib/generic/math/clc_exp.cl
Normal file
@ -0,0 +1,19 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_exp_helper.h>
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
|
||||
#define __CLC_BODY <clc_exp.inc>
|
||||
#include <clc/math/gentype.inc>
|
76
libclc/clc/lib/generic/math/clc_exp.inc
Normal file
76
libclc/clc/lib/generic/math/clc_exp.inc
Normal file
@ -0,0 +1,76 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
|
||||
// Reduce x
|
||||
const __CLC_GENTYPE ln2HI = 0x1.62e300p-1f;
|
||||
const __CLC_GENTYPE ln2LO = 0x1.2fefa2p-17f;
|
||||
const __CLC_GENTYPE invln2 = 0x1.715476p+0f;
|
||||
|
||||
__CLC_GENTYPE fhalF = x < 0.0f ? -0.5f : 0.5f;
|
||||
__CLC_INTN p = __CLC_CONVERT_INTN(__clc_mad(x, invln2, fhalF));
|
||||
__CLC_GENTYPE fp = __CLC_CONVERT_GENTYPE(p);
|
||||
__CLC_GENTYPE hi = __clc_mad(fp, -ln2HI, x); // t*ln2HI is exact here
|
||||
__CLC_GENTYPE lo = -fp * ln2LO;
|
||||
|
||||
// Evaluate poly
|
||||
__CLC_GENTYPE t = hi + lo;
|
||||
__CLC_GENTYPE tt = t * t;
|
||||
__CLC_GENTYPE v = __clc_mad(
|
||||
tt,
|
||||
-__clc_mad(
|
||||
tt,
|
||||
__clc_mad(tt,
|
||||
__clc_mad(tt,
|
||||
__clc_mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
|
||||
0x1.1566aap-14f),
|
||||
-0x1.6c16c2p-9f),
|
||||
0x1.555556p-3f),
|
||||
t);
|
||||
|
||||
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
|
||||
|
||||
// Scale by 2^p
|
||||
__CLC_GENTYPE r = __CLC_AS_GENTYPE(__CLC_AS_INTN(y) + (p << 23));
|
||||
|
||||
// ln(largest_normal) = 88.72283905206835305366
|
||||
const __CLC_GENTYPE ulim = 0x1.62e430p+6f;
|
||||
// ln(smallest_normal) = -87.33654475055310898657
|
||||
const __CLC_GENTYPE llim = -0x1.5d589ep+6f;
|
||||
|
||||
r = x < llim ? 0.0f : r;
|
||||
r = x < ulim ? r : __CLC_AS_GENTYPE((__CLC_UINTN)0x7f800000);
|
||||
return __clc_isnan(x) ? x : r;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
|
||||
|
||||
const __CLC_GENTYPE X_MIN = -0x1.74910d52d3051p+9; // -1075*ln(2)
|
||||
const __CLC_GENTYPE X_MAX = 0x1.62e42fefa39efp+9; // 1024*ln(2)
|
||||
const __CLC_GENTYPE R_64_BY_LOG2 = 0x1.71547652b82fep+6; // 64/ln(2)
|
||||
const __CLC_GENTYPE R_LOG2_BY_64_LD = 0x1.62e42fefa0000p-7; // head ln(2)/64
|
||||
const __CLC_GENTYPE R_LOG2_BY_64_TL = 0x1.cf79abc9e3b39p-46; // tail ln(2)/64
|
||||
|
||||
__CLC_INTN n = __CLC_CONVERT_INTN(x * R_64_BY_LOG2);
|
||||
__CLC_GENTYPE r =
|
||||
__clc_fma(-R_LOG2_BY_64_TL, __CLC_CONVERT_GENTYPE(n),
|
||||
__clc_fma(-R_LOG2_BY_64_LD, __CLC_CONVERT_GENTYPE(n), x));
|
||||
return __clc_exp_helper(x, X_MIN, X_MAX, r, n);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_exp(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
20
libclc/clc/lib/generic/math/clc_exp2.cl
Normal file
20
libclc/clc/lib/generic/math/clc_exp2.cl
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_exp_helper.h>
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/clc_rint.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
|
||||
#define __CLC_BODY <clc_exp2.inc>
|
||||
#include <clc/math/gentype.inc>
|
68
libclc/clc/lib/generic/math/clc_exp2.inc
Normal file
68
libclc/clc/lib/generic/math/clc_exp2.inc
Normal file
@ -0,0 +1,68 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
|
||||
// Reduce x
|
||||
const __CLC_GENTYPE ln2HI = 0x1.62e300p-1f;
|
||||
const __CLC_GENTYPE ln2LO = 0x1.2fefa2p-17f;
|
||||
|
||||
__CLC_GENTYPE t = __clc_rint(x);
|
||||
__CLC_INTN p = __CLC_CONVERT_INTN(t);
|
||||
__CLC_GENTYPE tt = x - t;
|
||||
__CLC_GENTYPE hi = tt * ln2HI;
|
||||
__CLC_GENTYPE lo = tt * ln2LO;
|
||||
|
||||
// Evaluate poly
|
||||
t = hi + lo;
|
||||
tt = t * t;
|
||||
__CLC_GENTYPE v = __clc_mad(
|
||||
tt,
|
||||
-__clc_mad(
|
||||
tt,
|
||||
__clc_mad(tt,
|
||||
__clc_mad(tt,
|
||||
__clc_mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
|
||||
0x1.1566aap-14f),
|
||||
-0x1.6c16c2p-9f),
|
||||
0x1.555556p-3f),
|
||||
t);
|
||||
|
||||
__CLC_GENTYPE y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
|
||||
|
||||
// Scale by 2^p
|
||||
__CLC_GENTYPE r = __CLC_AS_FLOATN(__CLC_AS_INTN(y) + (p << 23));
|
||||
|
||||
const __CLC_GENTYPE ulim = 128.0f;
|
||||
const __CLC_GENTYPE llim = -126.0f;
|
||||
|
||||
r = x < llim ? 0.0f : r;
|
||||
r = x < ulim ? r : __CLC_AS_FLOATN((__CLC_UINTN)0x7f800000);
|
||||
return __clc_isnan(x) ? x : r;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
|
||||
const __CLC_GENTYPE R_LN2 = 0x1.62e42fefa39efp-1; // ln(2)
|
||||
const __CLC_GENTYPE R_1_BY_64 = 1.0 / 64.0;
|
||||
|
||||
__CLC_INTN n = __CLC_CONVERT_INTN(x * 64.0);
|
||||
__CLC_GENTYPE r = R_LN2 * __clc_fma(-R_1_BY_64, __CLC_CONVERT_GENTYPE(n), x);
|
||||
|
||||
return __clc_exp_helper(x, -1074.0, 1024.0, r, n);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_exp2(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_exp2(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
20
libclc/clc/lib/generic/math/clc_exp_helper.cl
Normal file
20
libclc/clc/lib/generic/math/clc_exp_helper.cl
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_ldexp.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
|
||||
#define __DOUBLE_ONLY
|
||||
#define __CLC_BODY <clc_exp_helper.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
54
libclc/clc/lib/generic/math/clc_exp_helper.inc
Normal file
54
libclc/clc/lib/generic/math/clc_exp_helper.inc
Normal file
@ -0,0 +1,54 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_exp_helper(__CLC_GENTYPE x,
|
||||
__CLC_GENTYPE x_min,
|
||||
__CLC_GENTYPE x_max,
|
||||
__CLC_GENTYPE r,
|
||||
__CLC_INTN n) {
|
||||
|
||||
__CLC_INTN j = n & 0x3f;
|
||||
__CLC_INTN m = n >> 6;
|
||||
|
||||
// 6 term tail of Taylor expansion of e^r
|
||||
__CLC_GENTYPE z2 =
|
||||
r * __clc_fma(
|
||||
r,
|
||||
__clc_fma(r,
|
||||
__clc_fma(r,
|
||||
__clc_fma(r,
|
||||
__clc_fma(r, 0x1.6c16c16c16c17p-10,
|
||||
0x1.1111111111111p-7),
|
||||
0x1.5555555555555p-5),
|
||||
0x1.5555555555555p-3),
|
||||
0x1.0000000000000p-1),
|
||||
1.0);
|
||||
|
||||
__CLC_GENTYPE tv0 = USE_TABLE(two_to_jby64_ep_tbl_head, j);
|
||||
__CLC_GENTYPE tv1 = USE_TABLE(two_to_jby64_ep_tbl_tail, j);
|
||||
z2 = __clc_fma(tv0 + tv1, z2, tv1) + tv0;
|
||||
|
||||
__CLC_INTN small_value =
|
||||
(m < -1022) || ((m == -1022) && __CLC_CONVERT_INTN(z2 < 1.0));
|
||||
|
||||
__CLC_INTN n1 = m >> 2;
|
||||
__CLC_INTN n2 = m - n1;
|
||||
__CLC_GENTYPE z3 =
|
||||
z2 * __CLC_AS_GENTYPE((__CLC_CONVERT_LONGN(n1) + 1023) << 52);
|
||||
z3 *= __CLC_AS_GENTYPE((__CLC_CONVERT_LONGN(n2) + 1023) << 52);
|
||||
|
||||
z2 = __clc_ldexp(z2, m);
|
||||
z2 = __CLC_CONVERT_LONGN(small_value) ? z3 : z2;
|
||||
|
||||
z2 = __clc_isnan(x) ? x : z2;
|
||||
|
||||
z2 = x > x_max ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z2;
|
||||
z2 = x < x_min ? 0.0 : z2;
|
||||
|
||||
return z2;
|
||||
}
|
20
libclc/clc/lib/generic/math/clc_expm1.cl
Normal file
20
libclc/clc/lib/generic/math/clc_expm1.cl
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_exp_helper.h>
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
|
||||
#define __CLC_BODY <clc_expm1.inc>
|
||||
#include <clc/math/gentype.inc>
|
169
libclc/clc/lib/generic/math/clc_expm1.inc
Normal file
169
libclc/clc/lib/generic/math/clc_expm1.inc
Normal file
@ -0,0 +1,169 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/* Refer to the exp routine for the underlying algorithm */
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_expm1(__CLC_GENTYPE x) {
|
||||
// 128*log2 : 88.722839111673
|
||||
const __CLC_GENTYPE X_MAX = 0x1.62e42ep+6f;
|
||||
// -149*log2 : -103.27892990343184
|
||||
const __CLC_GENTYPE X_MIN = -0x1.9d1da0p+6f;
|
||||
// 64/log2 : 92.332482616893657
|
||||
const __CLC_GENTYPE R_64_BY_LOG2 = 0x1.715476p+6f;
|
||||
// log2/64 lead: 0.0108032227
|
||||
const __CLC_GENTYPE R_LOG2_BY_64_LD = 0x1.620000p-7f;
|
||||
// log2/64 tail: 0.0000272020388
|
||||
const __CLC_GENTYPE R_LOG2_BY_64_TL = 0x1.c85fdep-16f;
|
||||
|
||||
__CLC_UINTN xi = __CLC_AS_UINTN(x);
|
||||
__CLC_INTN n = __CLC_CONVERT_INTN(x * R_64_BY_LOG2);
|
||||
__CLC_GENTYPE fn = __CLC_CONVERT_GENTYPE(n);
|
||||
|
||||
__CLC_INTN j = n & 0x3f;
|
||||
__CLC_INTN m = n >> 6;
|
||||
|
||||
__CLC_GENTYPE r =
|
||||
__clc_mad(fn, -R_LOG2_BY_64_TL, __clc_mad(fn, -R_LOG2_BY_64_LD, x));
|
||||
|
||||
// Truncated Taylor series
|
||||
__CLC_GENTYPE z2 = __clc_mad(
|
||||
r * r, __clc_mad(r, __clc_mad(r, 0x1.555556p-5f, 0x1.555556p-3f), 0.5f),
|
||||
r);
|
||||
|
||||
__CLC_GENTYPE m2 = __CLC_AS_GENTYPE((m + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
|
||||
__CLC_GENTYPE exp_head = USE_TABLE(exp_tbl_ep_head, j);
|
||||
__CLC_GENTYPE exp_tail = USE_TABLE(exp_tbl_ep_tail, j);
|
||||
|
||||
__CLC_GENTYPE two_to_jby64_h = exp_head * m2;
|
||||
__CLC_GENTYPE two_to_jby64_t = exp_tail * m2;
|
||||
__CLC_GENTYPE two_to_jby64 = two_to_jby64_h + two_to_jby64_t;
|
||||
|
||||
z2 = __clc_mad(z2, two_to_jby64, two_to_jby64_t) + (two_to_jby64_h - 1.0f);
|
||||
// Make subnormals work
|
||||
z2 = x == 0.f ? x : z2;
|
||||
z2 = x < X_MIN || m < -24 ? -1.0f : z2;
|
||||
z2 = x > X_MAX ? __CLC_AS_GENTYPE((__CLC_UINTN)PINFBITPATT_SP32) : z2;
|
||||
z2 = __clc_isnan(x) ? x : z2;
|
||||
|
||||
return z2;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_expm1(__CLC_GENTYPE x) {
|
||||
const __CLC_GENTYPE max_expm1_arg = 709.8;
|
||||
const __CLC_GENTYPE min_expm1_arg = -37.42994775023704;
|
||||
// 0x3FCC8FF7C79A9A22 = log(1+1/4)
|
||||
const __CLC_GENTYPE log_OnePlus_OneByFour = 0.22314355131420976;
|
||||
// 0xBFD269621134DB93 = log(1-1/4)
|
||||
const __CLC_GENTYPE log_OneMinus_OneByFour = -0.28768207245178096;
|
||||
const __CLC_GENTYPE sixtyfour_by_lnof2 =
|
||||
92.33248261689366; // 0x40571547652b82fe
|
||||
const __CLC_GENTYPE lnof2_by_64_head =
|
||||
0.010830424696223417; // 0x3f862e42fefa0000
|
||||
const __CLC_GENTYPE lnof2_by_64_tail =
|
||||
2.5728046223276688e-14; // 0x3d1cf79abc9e3b39
|
||||
|
||||
// First, assume log(1-1/4) < x < log(1+1/4) i.e -0.28768 < x < 0.22314
|
||||
__CLC_GENTYPE u = __CLC_AS_GENTYPE(__CLC_AS_ULONGN(x) & 0xffffffffff000000UL);
|
||||
__CLC_GENTYPE v = x - u;
|
||||
__CLC_GENTYPE y = u * u * 0.5;
|
||||
__CLC_GENTYPE z = v * (x + u) * 0.5;
|
||||
|
||||
__CLC_GENTYPE q = __clc_fma(
|
||||
x,
|
||||
__clc_fma(
|
||||
x,
|
||||
__clc_fma(
|
||||
x,
|
||||
__clc_fma(
|
||||
x,
|
||||
__clc_fma(
|
||||
x,
|
||||
__clc_fma(x,
|
||||
__clc_fma(x,
|
||||
__clc_fma(x, 2.4360682937111612e-8,
|
||||
2.7582184028154370e-7),
|
||||
2.7558212415361945e-6),
|
||||
2.4801576918453420e-5),
|
||||
1.9841269447671544e-4),
|
||||
1.3888888890687830e-3),
|
||||
8.3333333334012270e-3),
|
||||
4.1666666666665560e-2),
|
||||
1.6666666666666632e-1);
|
||||
q *= x * x * x;
|
||||
|
||||
__CLC_GENTYPE z1g = (u + y) + (q + (v + z));
|
||||
__CLC_GENTYPE z1 = x + (y + (q + z));
|
||||
z1 = y >= 0x1.0p-7 ? z1g : z1;
|
||||
|
||||
// Now assume outside interval around 0
|
||||
__CLC_INTN n = __CLC_CONVERT_INTN(x * sixtyfour_by_lnof2);
|
||||
__CLC_INTN j = n & 0x3f;
|
||||
__CLC_INTN m = n >> 6;
|
||||
|
||||
__CLC_GENTYPE f1 = USE_TABLE(two_to_jby64_ep_tbl_head, j);
|
||||
__CLC_GENTYPE f2 = USE_TABLE(two_to_jby64_ep_tbl_tail, j);
|
||||
__CLC_GENTYPE f = f1 + f2;
|
||||
|
||||
__CLC_GENTYPE dn = __CLC_CONVERT_GENTYPE(-n);
|
||||
__CLC_GENTYPE r =
|
||||
__clc_fma(dn, lnof2_by_64_tail, __clc_fma(dn, lnof2_by_64_head, x));
|
||||
|
||||
q = __clc_fma(r,
|
||||
__clc_fma(r,
|
||||
__clc_fma(r,
|
||||
__clc_fma(r, 1.38889490863777199667e-03,
|
||||
8.33336798434219616221e-03),
|
||||
4.16666666662260795726e-02),
|
||||
1.66666666665260878863e-01),
|
||||
5.00000000000000008883e-01);
|
||||
q = __clc_fma(r * r, q, r);
|
||||
|
||||
__CLC_GENTYPE twopm = __CLC_AS_GENTYPE(__CLC_CONVERT_LONGN(m + EXPBIAS_DP64)
|
||||
<< EXPSHIFTBITS_DP64);
|
||||
__CLC_GENTYPE twopmm = __CLC_AS_GENTYPE(__CLC_CONVERT_LONGN(EXPBIAS_DP64 - m)
|
||||
<< EXPSHIFTBITS_DP64);
|
||||
|
||||
// Computations for m > 52, including where result is close to Inf
|
||||
__CLC_ULONGN uval = __CLC_AS_ULONGN(0x1.0p+1023 * (f1 + (f * q + (f2))));
|
||||
__CLC_INTN e = __CLC_CONVERT_INTN(uval >> EXPSHIFTBITS_DP64) + 1;
|
||||
|
||||
__CLC_GENTYPE zme1024 = __CLC_AS_GENTYPE(
|
||||
(__CLC_CONVERT_ULONGN(e) << EXPSHIFTBITS_DP64) | (uval & MANTBITS_DP64));
|
||||
zme1024 = __CLC_CONVERT_LONGN(e == 2047)
|
||||
? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64)
|
||||
: zme1024;
|
||||
|
||||
__CLC_GENTYPE zmg52 = twopm * (f1 + __clc_fma(f, q, f2 - twopmm));
|
||||
zmg52 = __CLC_CONVERT_LONGN(m == 1024) ? zme1024 : zmg52;
|
||||
|
||||
// For m < 53
|
||||
__CLC_GENTYPE zml53 =
|
||||
twopm * ((f1 - twopmm) + __clc_fma(f1, q, f2 * (1.0 + q)));
|
||||
|
||||
// For m < -7
|
||||
__CLC_GENTYPE zmln7 = __clc_fma(twopm, f1 + __clc_fma(f, q, f2), -1.0);
|
||||
|
||||
z = __CLC_CONVERT_LONGN(m < 53) ? zml53 : zmg52;
|
||||
z = __CLC_CONVERT_LONGN(m < -7) ? zmln7 : z;
|
||||
z = x > log_OneMinus_OneByFour && x < log_OnePlus_OneByFour ? z1 : z;
|
||||
z = x > max_expm1_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z;
|
||||
z = x < min_expm1_arg ? -1.0 : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_expm1(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_expm1(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
@ -25,7 +25,6 @@ subnormal_config.cl
|
||||
../../generic/lib/math/exp.cl
|
||||
../../generic/lib/math/exp10.cl
|
||||
../../generic/lib/math/exp2.cl
|
||||
../../generic/lib/math/exp_helper.cl
|
||||
../../generic/lib/math/expm1.cl
|
||||
../../generic/lib/math/fdim.cl
|
||||
../../generic/lib/math/fmod.cl
|
||||
|
@ -97,7 +97,6 @@ math/cospi.cl
|
||||
math/erf.cl
|
||||
math/erfc.cl
|
||||
math/exp.cl
|
||||
math/exp_helper.cl
|
||||
math/expm1.cl
|
||||
math/exp2.cl
|
||||
math/exp10.cl
|
||||
|
@ -7,77 +7,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/clc_exp.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float exp(float x) {
|
||||
|
||||
// Reduce x
|
||||
const float ln2HI = 0x1.62e300p-1f;
|
||||
const float ln2LO = 0x1.2fefa2p-17f;
|
||||
const float invln2 = 0x1.715476p+0f;
|
||||
|
||||
float fhalF = x < 0.0f ? -0.5f : 0.5f;
|
||||
int p = mad(x, invln2, fhalF);
|
||||
float fp = (float)p;
|
||||
float hi = mad(fp, -ln2HI, x); // t*ln2HI is exact here
|
||||
float lo = -fp*ln2LO;
|
||||
|
||||
// Evaluate poly
|
||||
float t = hi + lo;
|
||||
float tt = t*t;
|
||||
float v = mad(tt,
|
||||
-mad(tt,
|
||||
mad(tt,
|
||||
mad(tt,
|
||||
mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
|
||||
0x1.1566aap-14f),
|
||||
-0x1.6c16c2p-9f),
|
||||
0x1.555556p-3f),
|
||||
t);
|
||||
|
||||
float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
|
||||
|
||||
// Scale by 2^p
|
||||
float r = as_float(as_int(y) + (p << 23));
|
||||
|
||||
const float ulim = 0x1.62e430p+6f; // ln(largest_normal) = 88.72283905206835305366
|
||||
const float llim = -0x1.5d589ep+6f; // ln(smallest_normal) = -87.33654475055310898657
|
||||
|
||||
r = x < llim ? 0.0f : r;
|
||||
r = x < ulim ? r : as_float(0x7f800000);
|
||||
return isnan(x) ? x : r;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp, float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#include "exp_helper.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double exp(double x) {
|
||||
|
||||
const double X_MIN = -0x1.74910d52d3051p+9; // -1075*ln(2)
|
||||
const double X_MAX = 0x1.62e42fefa39efp+9; // 1024*ln(2)
|
||||
const double R_64_BY_LOG2 = 0x1.71547652b82fep+6; // 64/ln(2)
|
||||
const double R_LOG2_BY_64_LD = 0x1.62e42fefa0000p-7; // head ln(2)/64
|
||||
const double R_LOG2_BY_64_TL = 0x1.cf79abc9e3b39p-46; // tail ln(2)/64
|
||||
|
||||
int n = convert_int(x * R_64_BY_LOG2);
|
||||
double r = fma(-R_LOG2_BY_64_TL, (double)n, fma(-R_LOG2_BY_64_LD, (double)n, x));
|
||||
return __clc_exp_helper(x, X_MIN, X_MAX, r, n);
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN_FP16(exp)
|
||||
|
||||
#endif
|
||||
#define FUNCTION exp
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -7,65 +7,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/clc_exp2.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float exp2(float x) {
|
||||
|
||||
// Reduce x
|
||||
const float ln2HI = 0x1.62e300p-1f;
|
||||
const float ln2LO = 0x1.2fefa2p-17f;
|
||||
|
||||
float t = rint(x);
|
||||
int p = (int)t;
|
||||
float tt = x - t;
|
||||
float hi = tt * ln2HI;
|
||||
float lo = tt * ln2LO;
|
||||
|
||||
// Evaluate poly
|
||||
t = hi + lo;
|
||||
tt = t*t;
|
||||
float v = mad(tt,
|
||||
-mad(tt,
|
||||
mad(tt,
|
||||
mad(tt,
|
||||
mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
|
||||
0x1.1566aap-14f),
|
||||
-0x1.6c16c2p-9f),
|
||||
0x1.555556p-3f),
|
||||
t);
|
||||
|
||||
float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
|
||||
|
||||
// Scale by 2^p
|
||||
float r = as_float(as_int(y) + (p << 23));
|
||||
|
||||
const float ulim = 128.0f;
|
||||
const float llim = -126.0f;
|
||||
|
||||
r = x < llim ? 0.0f : r;
|
||||
r = x < ulim ? r : as_float(0x7f800000);
|
||||
return isnan(x) ? x : r;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp2, float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#include "exp_helper.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double exp2(double x) {
|
||||
const double R_LN2 = 0x1.62e42fefa39efp-1; // ln(2)
|
||||
const double R_1_BY_64 = 1.0 / 64.0;
|
||||
|
||||
int n = convert_int(x * 64.0);
|
||||
double r = R_LN2 * fma(-R_1_BY_64, (double)n, x);
|
||||
return __clc_exp_helper(x, -1074.0, 1024.0, r, n);
|
||||
}
|
||||
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp2, double)
|
||||
|
||||
#endif
|
||||
#define FUNCTION exp2
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -1,55 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) {
|
||||
|
||||
int j = n & 0x3f;
|
||||
int m = n >> 6;
|
||||
|
||||
// 6 term tail of Taylor expansion of e^r
|
||||
double z2 = r * fma(r,
|
||||
fma(r,
|
||||
fma(r,
|
||||
fma(r,
|
||||
fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
|
||||
0x1.5555555555555p-5),
|
||||
0x1.5555555555555p-3),
|
||||
0x1.0000000000000p-1),
|
||||
1.0);
|
||||
|
||||
double tv0 = USE_TABLE(two_to_jby64_ep_tbl_head, j);
|
||||
double tv1 = USE_TABLE(two_to_jby64_ep_tbl_tail, j);
|
||||
z2 = fma(tv0 + tv1, z2, tv1) + tv0;
|
||||
|
||||
int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
|
||||
|
||||
int n1 = m >> 2;
|
||||
int n2 = m-n1;
|
||||
double z3= z2 * as_double(((long)n1 + 1023) << 52);
|
||||
z3 *= as_double(((long)n2 + 1023) << 52);
|
||||
|
||||
z2 = ldexp(z2, m);
|
||||
z2 = small_value ? z3: z2;
|
||||
|
||||
z2 = isnan(x) ? x : z2;
|
||||
|
||||
z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2;
|
||||
z2 = x < x_min ? 0.0 : z2;
|
||||
|
||||
return z2;
|
||||
}
|
||||
|
||||
#endif // cl_khr_fp64
|
@ -7,151 +7,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/math/clc_expm1.h>
|
||||
|
||||
/* Refer to the exp routine for the underlying algorithm */
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float expm1(float x) {
|
||||
const float X_MAX = 0x1.62e42ep+6f; // 128*log2 : 88.722839111673
|
||||
const float X_MIN = -0x1.9d1da0p+6f; // -149*log2 : -103.27892990343184
|
||||
|
||||
const float R_64_BY_LOG2 = 0x1.715476p+6f; // 64/log2 : 92.332482616893657
|
||||
const float R_LOG2_BY_64_LD = 0x1.620000p-7f; // log2/64 lead: 0.0108032227
|
||||
const float R_LOG2_BY_64_TL = 0x1.c85fdep-16f; // log2/64 tail: 0.0000272020388
|
||||
|
||||
uint xi = as_uint(x);
|
||||
int n = (int)(x * R_64_BY_LOG2);
|
||||
float fn = (float)n;
|
||||
|
||||
int j = n & 0x3f;
|
||||
int m = n >> 6;
|
||||
|
||||
float r = mad(fn, -R_LOG2_BY_64_TL, mad(fn, -R_LOG2_BY_64_LD, x));
|
||||
|
||||
// Truncated Taylor series
|
||||
float z2 = mad(r*r, mad(r, mad(r, 0x1.555556p-5f, 0x1.555556p-3f), 0.5f), r);
|
||||
|
||||
float m2 = as_float((m + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
|
||||
float exp_head = USE_TABLE(exp_tbl_ep_head, j);
|
||||
float exp_tail = USE_TABLE(exp_tbl_ep_tail, j);
|
||||
|
||||
float two_to_jby64_h = exp_head * m2;
|
||||
float two_to_jby64_t = exp_tail * m2;
|
||||
float two_to_jby64 = two_to_jby64_h + two_to_jby64_t;
|
||||
|
||||
z2 = mad(z2, two_to_jby64, two_to_jby64_t) + (two_to_jby64_h - 1.0f);
|
||||
//Make subnormals work
|
||||
z2 = x == 0.f ? x : z2;
|
||||
z2 = x < X_MIN | m < -24 ? -1.0f : z2;
|
||||
z2 = x > X_MAX ? as_float(PINFBITPATT_SP32) : z2;
|
||||
z2 = isnan(x) ? x : z2;
|
||||
|
||||
return z2;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, expm1, float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#include "exp_helper.h"
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double expm1(double x) {
|
||||
const double max_expm1_arg = 709.8;
|
||||
const double min_expm1_arg = -37.42994775023704;
|
||||
const double log_OnePlus_OneByFour = 0.22314355131420976; //0x3FCC8FF7C79A9A22 = log(1+1/4)
|
||||
const double log_OneMinus_OneByFour = -0.28768207245178096; //0xBFD269621134DB93 = log(1-1/4)
|
||||
const double sixtyfour_by_lnof2 = 92.33248261689366; //0x40571547652b82fe
|
||||
const double lnof2_by_64_head = 0.010830424696223417; //0x3f862e42fefa0000
|
||||
const double lnof2_by_64_tail = 2.5728046223276688e-14; //0x3d1cf79abc9e3b39
|
||||
|
||||
// First, assume log(1-1/4) < x < log(1+1/4) i.e -0.28768 < x < 0.22314
|
||||
double u = as_double(as_ulong(x) & 0xffffffffff000000UL);
|
||||
double v = x - u;
|
||||
double y = u * u * 0.5;
|
||||
double z = v * (x + u) * 0.5;
|
||||
|
||||
double q = fma(x,
|
||||
fma(x,
|
||||
fma(x,
|
||||
fma(x,
|
||||
fma(x,
|
||||
fma(x,
|
||||
fma(x,
|
||||
fma(x,2.4360682937111612e-8, 2.7582184028154370e-7),
|
||||
2.7558212415361945e-6),
|
||||
2.4801576918453420e-5),
|
||||
1.9841269447671544e-4),
|
||||
1.3888888890687830e-3),
|
||||
8.3333333334012270e-3),
|
||||
4.1666666666665560e-2),
|
||||
1.6666666666666632e-1);
|
||||
q *= x * x * x;
|
||||
|
||||
double z1g = (u + y) + (q + (v + z));
|
||||
double z1 = x + (y + (q + z));
|
||||
z1 = y >= 0x1.0p-7 ? z1g : z1;
|
||||
|
||||
// Now assume outside interval around 0
|
||||
int n = (int)(x * sixtyfour_by_lnof2);
|
||||
int j = n & 0x3f;
|
||||
int m = n >> 6;
|
||||
|
||||
double f1 = USE_TABLE(two_to_jby64_ep_tbl_head, j);
|
||||
double f2 = USE_TABLE(two_to_jby64_ep_tbl_tail, j);
|
||||
double f = f1 + f2;
|
||||
|
||||
double dn = -n;
|
||||
double r = fma(dn, lnof2_by_64_tail, fma(dn, lnof2_by_64_head, x));
|
||||
|
||||
q = fma(r,
|
||||
fma(r,
|
||||
fma(r,
|
||||
fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
|
||||
4.16666666662260795726e-02),
|
||||
1.66666666665260878863e-01),
|
||||
5.00000000000000008883e-01);
|
||||
q = fma(r*r, q, r);
|
||||
|
||||
double twopm = as_double((long)(m + EXPBIAS_DP64) << EXPSHIFTBITS_DP64);
|
||||
double twopmm = as_double((long)(EXPBIAS_DP64 - m) << EXPSHIFTBITS_DP64);
|
||||
|
||||
// Computations for m > 52, including where result is close to Inf
|
||||
ulong uval = as_ulong(0x1.0p+1023 * (f1 + (f * q + (f2))));
|
||||
int e = (int)(uval >> EXPSHIFTBITS_DP64) + 1;
|
||||
|
||||
double zme1024 = as_double(((long)e << EXPSHIFTBITS_DP64) | (uval & MANTBITS_DP64));
|
||||
zme1024 = e == 2047 ? as_double(PINFBITPATT_DP64) : zme1024;
|
||||
|
||||
double zmg52 = twopm * (f1 + fma(f, q, f2 - twopmm));
|
||||
zmg52 = m == 1024 ? zme1024 : zmg52;
|
||||
|
||||
// For m < 53
|
||||
double zml53 = twopm * ((f1 - twopmm) + fma(f1, q, f2*(1.0 + q)));
|
||||
|
||||
// For m < -7
|
||||
double zmln7 = fma(twopm, f1 + fma(f, q, f2), -1.0);
|
||||
|
||||
z = m < 53 ? zml53 : zmg52;
|
||||
z = m < -7 ? zmln7 : z;
|
||||
z = x > log_OneMinus_OneByFour & x < log_OnePlus_OneByFour ? z1 : z;
|
||||
z = x > max_expm1_arg ? as_double(PINFBITPATT_DP64) : z;
|
||||
z = x < min_expm1_arg ? -1.0 : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN_FP16(expm1)
|
||||
|
||||
#endif
|
||||
#define FUNCTION expm1
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -35,7 +35,6 @@ subnormal_config.cl
|
||||
../../generic/lib/math/erf.cl
|
||||
../../generic/lib/math/erfc.cl
|
||||
../../generic/lib/math/exp.cl
|
||||
../../generic/lib/math/exp_helper.cl
|
||||
../../generic/lib/math/expm1.cl
|
||||
../../generic/lib/math/exp2.cl
|
||||
../../generic/lib/math/exp10.cl
|
||||
|
Loading…
x
Reference in New Issue
Block a user