mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-15 22:06:32 +00:00
[libclc] Move sinh, cosh & tanh to the CLC library (#134063)
This commit also vectorizes the builtins.
This commit is contained in:
parent
d51525ba36
commit
f186041553
20
libclc/clc/include/clc/math/clc_cosh.h
Normal file
20
libclc/clc/include/clc/math/clc_cosh.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_COSH_H__
|
||||
#define __CLC_MATH_CLC_COSH_H__
|
||||
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_cosh
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_COSH_H__
|
20
libclc/clc/include/clc/math/clc_sinh.h
Normal file
20
libclc/clc/include/clc/math/clc_sinh.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_SINH_H__
|
||||
#define __CLC_MATH_CLC_SINH_H__
|
||||
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_sinh
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_SINH_H__
|
20
libclc/clc/include/clc/math/clc_tanh.h
Normal file
20
libclc/clc/include/clc/math/clc_tanh.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_TANH_H__
|
||||
#define __CLC_MATH_CLC_TANH_H__
|
||||
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_tanh
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_TANH_H__
|
@ -62,7 +62,6 @@
|
||||
TABLE_FUNCTION_DECL(float2, log2_tbl);
|
||||
TABLE_FUNCTION_DECL(float2, log10_tbl);
|
||||
TABLE_FUNCTION_DECL(uint4, pibits_tbl);
|
||||
TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
|
||||
|
||||
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_head);
|
||||
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_tail);
|
||||
@ -74,6 +73,8 @@ CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_head);
|
||||
CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_tail);
|
||||
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_head);
|
||||
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_tail);
|
||||
CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_head);
|
||||
CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_tail);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
@ -85,8 +86,10 @@ CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_head);
|
||||
CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_tail);
|
||||
CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_head);
|
||||
CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_tail);
|
||||
TABLE_FUNCTION_DECL(double2, sinh_tbl);
|
||||
TABLE_FUNCTION_DECL(double2, cosh_tbl);
|
||||
CLC_TABLE_FUNCTION_DECL(double, sinh_tbl_head);
|
||||
CLC_TABLE_FUNCTION_DECL(double, sinh_tbl_tail);
|
||||
CLC_TABLE_FUNCTION_DECL(double, cosh_tbl_head);
|
||||
CLC_TABLE_FUNCTION_DECL(double, cosh_tbl_tail);
|
||||
CLC_TABLE_FUNCTION_DECL(double, cbrt_inv_tbl);
|
||||
CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_head);
|
||||
CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_tail);
|
||||
|
@ -31,6 +31,7 @@ math/clc_atanpi.cl
|
||||
math/clc_cbrt.cl
|
||||
math/clc_ceil.cl
|
||||
math/clc_copysign.cl
|
||||
math/clc_cosh.cl
|
||||
math/clc_cospi.cl
|
||||
math/clc_ep_log.cl
|
||||
math/clc_exp.cl
|
||||
@ -76,10 +77,12 @@ math/clc_rootn.cl
|
||||
math/clc_round.cl
|
||||
math/clc_rsqrt.cl
|
||||
math/clc_sincos_helpers.cl
|
||||
math/clc_sinh.cl
|
||||
math/clc_sinpi.cl
|
||||
math/clc_sqrt.cl
|
||||
math/clc_sw_fma.cl
|
||||
math/clc_tables.cl
|
||||
math/clc_tanh.cl
|
||||
math/clc_tanpi.cl
|
||||
math/clc_tgamma.cl
|
||||
math/clc_trunc.cl
|
||||
|
24
libclc/clc/lib/generic/math/clc_cosh.cl
Normal file
24
libclc/clc/lib/generic/math/clc_cosh.cl
Normal file
@ -0,0 +1,24 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_copysign.h>
|
||||
#include <clc/math/clc_exp.h>
|
||||
#include <clc/math/clc_fabs.h>
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/relational/clc_isinf.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
#include <clc/shared/clc_min.h>
|
||||
|
||||
#define __CLC_BODY <clc_cosh.inc>
|
||||
#include <clc/math/gentype.inc>
|
199
libclc/clc/lib/generic/math/clc_cosh.inc
Normal file
199
libclc/clc/lib/generic/math/clc_cosh.inc
Normal file
@ -0,0 +1,199 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
|
||||
// After dealing with special cases the computation is split into regions as
|
||||
// follows. abs(x) >= max_cosh_arg: cosh(x) = sign(x)*Inf abs(x) >=
|
||||
// small_threshold: cosh(x) = sign(x)*exp(abs(x))/2 computed using the
|
||||
// splitexp and scaleDouble functions as for exp_amd().
|
||||
// abs(x) < small_threshold:
|
||||
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
|
||||
// cosh(x) is then z.
|
||||
|
||||
const __CLC_GENTYPE max_cosh_arg = 0x1.65a9fap+6f;
|
||||
const __CLC_GENTYPE small_threshold = 0x1.0a2b24p+3f;
|
||||
|
||||
__CLC_UINTN ux = __CLC_AS_UINTN(x);
|
||||
__CLC_GENTYPE y = __clc_fabs(x);
|
||||
__CLC_UINTN aux = __CLC_AS_UINTN(y);
|
||||
|
||||
// Find the integer part y0 of y and the increment dy = y - y0. We then
|
||||
// compute z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) z = cosh(y) =
|
||||
// cosh(y0)cosh(dy) + sinh(y0)sinh(dy) where sinh(y0) and cosh(y0) are
|
||||
// tabulated above.
|
||||
|
||||
__CLC_INTN ind = __CLC_CONVERT_INTN(y);
|
||||
ind = __CLC_CONVERT_UINTN(ind) > 36U ? 0 : ind;
|
||||
|
||||
__CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
|
||||
__CLC_GENTYPE dy2 = dy * dy;
|
||||
|
||||
__CLC_GENTYPE sdy = __clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(dy2,
|
||||
__clc_mad(dy2, 0.7746188980094184251527126e-12f,
|
||||
0.160576793121939886190847e-9f),
|
||||
0.250521176994133472333666e-7f),
|
||||
0.275573191913636406057211e-5f),
|
||||
0.198412698413242405162014e-3f),
|
||||
0.833333333333329931873097e-2f),
|
||||
0.166666666666666667013899e0f);
|
||||
sdy = __clc_mad(sdy, dy * dy2, dy);
|
||||
|
||||
__CLC_GENTYPE cdy = __clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(dy2,
|
||||
__clc_mad(dy2, 0.1163921388172173692062032e-10f,
|
||||
0.208744349831471353536305e-8f),
|
||||
0.275573350756016588011357e-6f),
|
||||
0.248015872460622433115785e-4f),
|
||||
0.138888888889814854814536e-2f),
|
||||
0.416666666666660876512776e-1f),
|
||||
0.500000000000000005911074e0f);
|
||||
cdy = __clc_mad(cdy, dy2, 1.0f);
|
||||
|
||||
__CLC_GENTYPE sinhcoshh = USE_TABLE(sinhcosh_tbl_head, ind);
|
||||
__CLC_GENTYPE sinhcosht = USE_TABLE(sinhcosh_tbl_tail, ind);
|
||||
__CLC_GENTYPE z = __clc_mad(sinhcoshh, sdy, sinhcosht * cdy);
|
||||
|
||||
// When exp(-x) is insignificant compared to exp(x), return exp(x)/2
|
||||
__CLC_GENTYPE t = __clc_exp(y - 0x1.62e500p-1f);
|
||||
__CLC_GENTYPE zsmall = __clc_mad(0x1.a0210ep-18f, t, t);
|
||||
z = y >= small_threshold ? zsmall : z;
|
||||
|
||||
// Corner cases
|
||||
z = y >= max_cosh_arg ? __CLC_AS_GENTYPE((__CLC_UINTN)PINFBITPATT_SP32) : z;
|
||||
z = aux > PINFBITPATT_SP32 ? __CLC_GENTYPE_NAN : z;
|
||||
z = aux < 0x38800000 ? 1.0f : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
|
||||
// After dealing with special cases the computation is split into
|
||||
// regions as follows:
|
||||
//
|
||||
// abs(x) >= max_cosh_arg:
|
||||
// cosh(x) = sign(x)*Inf
|
||||
//
|
||||
// abs(x) >= small_threshold:
|
||||
// cosh(x) = sign(x)*exp(abs(x))/2 computed using the
|
||||
// splitexp and scaleDouble functions as for exp_amd().
|
||||
//
|
||||
// abs(x) < small_threshold:
|
||||
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
|
||||
// cosh(x) is then sign(x)*z.
|
||||
|
||||
// This is ln(2^1025) = 0x408633ce8fb9f87e
|
||||
const __CLC_GENTYPE max_cosh_arg = 7.10475860073943977113e+02;
|
||||
|
||||
// This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
|
||||
const __CLC_GENTYPE small_threshold = 0x1.2b708872320e2p+4;
|
||||
|
||||
__CLC_GENTYPE y = __clc_fabs(x);
|
||||
|
||||
// In this range we find the integer part y0 of y
|
||||
// and the increment dy = y - y0. We then compute
|
||||
// z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
|
||||
// where sinh(y0) and cosh(y0) are tabulated above.
|
||||
|
||||
__CLC_INTN ind = __clc_min(__CLC_CONVERT_INTN(y), 36);
|
||||
__CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
|
||||
__CLC_GENTYPE dy2 = dy * dy;
|
||||
|
||||
__CLC_GENTYPE sdy =
|
||||
dy * dy2 *
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(dy2,
|
||||
__clc_fma(dy2, 0.7746188980094184251527126e-12,
|
||||
0.160576793121939886190847e-9),
|
||||
0.250521176994133472333666e-7),
|
||||
0.275573191913636406057211e-5),
|
||||
0.198412698413242405162014e-3),
|
||||
0.833333333333329931873097e-2),
|
||||
0.166666666666666667013899e0);
|
||||
|
||||
__CLC_GENTYPE cdy =
|
||||
dy2 *
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(dy2,
|
||||
__clc_fma(dy2, 0.1163921388172173692062032e-10,
|
||||
0.208744349831471353536305e-8),
|
||||
0.275573350756016588011357e-6),
|
||||
0.248015872460622433115785e-4),
|
||||
0.138888888889814854814536e-2),
|
||||
0.416666666666660876512776e-1),
|
||||
0.500000000000000005911074e0);
|
||||
|
||||
// At this point sinh(dy) is approximated by dy + sdy,
|
||||
// and cosh(dy) is approximated by 1 + cdy.
|
||||
__CLC_GENTYPE cl = USE_TABLE(cosh_tbl_head, ind);
|
||||
__CLC_GENTYPE ct = USE_TABLE(cosh_tbl_tail, ind);
|
||||
__CLC_GENTYPE sl = USE_TABLE(sinh_tbl_head, ind);
|
||||
__CLC_GENTYPE st = USE_TABLE(sinh_tbl_tail, ind);
|
||||
|
||||
__CLC_GENTYPE z =
|
||||
__clc_fma(
|
||||
sl, dy,
|
||||
__clc_fma(sl, sdy,
|
||||
__clc_fma(cl, cdy,
|
||||
__clc_fma(st, dy, __clc_fma(st, sdy, ct * cdy)) +
|
||||
ct))) +
|
||||
cl;
|
||||
|
||||
// Other cases
|
||||
z = y < 0x1.0p-28 ? 1.0 : z;
|
||||
|
||||
__CLC_GENTYPE t = __clc_exp(y - 0x1.62e42fefa3800p-1);
|
||||
t = __clc_fma(t, -0x1.ef35793c76641p-45, t);
|
||||
z = y >= small_threshold ? t : z;
|
||||
|
||||
z = y >= max_cosh_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z;
|
||||
|
||||
z = __clc_isinf(x) || __clc_isnan(x) ? y : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_cosh(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
23
libclc/clc/lib/generic/math/clc_sinh.cl
Normal file
23
libclc/clc/lib/generic/math/clc_sinh.cl
Normal file
@ -0,0 +1,23 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_copysign.h>
|
||||
#include <clc/math/clc_exp.h>
|
||||
#include <clc/math/clc_fabs.h>
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/relational/clc_isinf.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
#include <clc/shared/clc_min.h>
|
||||
|
||||
#define __CLC_BODY <clc_sinh.inc>
|
||||
#include <clc/math/gentype.inc>
|
201
libclc/clc/lib/generic/math/clc_sinh.inc
Normal file
201
libclc/clc/lib/generic/math/clc_sinh.inc
Normal file
@ -0,0 +1,201 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
|
||||
// After dealing with special cases the computation is split into regions as
|
||||
// follows. abs(x) >= max_sinh_arg: sinh(x) = sign(x)*Inf abs(x) >=
|
||||
// small_threshold: sinh(x) = sign(x)*exp(abs(x))/2 computed using the
|
||||
// splitexp and scaleDouble functions as for exp_amd(). abs(x) <
|
||||
// small_threshold: compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
|
||||
// sinh(x) is then sign(x)*z.
|
||||
|
||||
const __CLC_GENTYPE max_sinh_arg = 0x1.65a9fap+6f;
|
||||
const __CLC_GENTYPE small_threshold = 0x1.0a2b24p+3f;
|
||||
|
||||
__CLC_UINTN ux = __CLC_AS_UINTN(x);
|
||||
__CLC_GENTYPE y = __clc_fabs(x);
|
||||
__CLC_UINTN aux = __CLC_AS_UINTN(y);
|
||||
__CLC_UINTN xs = ux ^ aux;
|
||||
|
||||
// We find the integer part y0 of y and the increment dy = y - y0. We then
|
||||
// compute z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) where sinh(y0)
|
||||
// and cosh(y0) are tabulated above.
|
||||
__CLC_INTN ind = __CLC_CONVERT_INTN(y);
|
||||
ind = __CLC_CONVERT_UINTN(ind) > 36U ? 0 : ind;
|
||||
|
||||
__CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
|
||||
__CLC_GENTYPE dy2 = dy * dy;
|
||||
|
||||
__CLC_GENTYPE sdy = __clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(dy2,
|
||||
__clc_mad(dy2, 0.7746188980094184251527126e-12f,
|
||||
0.160576793121939886190847e-9f),
|
||||
0.250521176994133472333666e-7f),
|
||||
0.275573191913636406057211e-5f),
|
||||
0.198412698413242405162014e-3f),
|
||||
0.833333333333329931873097e-2f),
|
||||
0.166666666666666667013899e0f);
|
||||
sdy = __clc_mad(sdy, dy * dy2, dy);
|
||||
|
||||
__CLC_GENTYPE cdy = __clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(
|
||||
dy2,
|
||||
__clc_mad(dy2,
|
||||
__clc_mad(dy2, 0.1163921388172173692062032e-10f,
|
||||
0.208744349831471353536305e-8f),
|
||||
0.275573350756016588011357e-6f),
|
||||
0.248015872460622433115785e-4f),
|
||||
0.138888888889814854814536e-2f),
|
||||
0.416666666666660876512776e-1f),
|
||||
0.500000000000000005911074e0f);
|
||||
cdy = __clc_mad(cdy, dy2, 1.0f);
|
||||
|
||||
__CLC_GENTYPE sinhcoshh = USE_TABLE(sinhcosh_tbl_head, ind);
|
||||
__CLC_GENTYPE sinhcosht = USE_TABLE(sinhcosh_tbl_tail, ind);
|
||||
__CLC_GENTYPE z = __clc_mad(sinhcosht, sdy, sinhcoshh * cdy);
|
||||
z = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(z));
|
||||
|
||||
// When y is large enough so that the negative exponential is negligible,
|
||||
// so sinh(y) is approximated by sign(x)*exp(y)/2.
|
||||
__CLC_GENTYPE t = __clc_exp(y - 0x1.62e500p-1f);
|
||||
__CLC_GENTYPE zsmall = __clc_mad(0x1.a0210ep-18f, t, t);
|
||||
zsmall = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(zsmall));
|
||||
z = y >= small_threshold ? zsmall : z;
|
||||
|
||||
// Corner cases
|
||||
__CLC_GENTYPE zinf = __CLC_AS_GENTYPE(PINFBITPATT_SP32 | xs);
|
||||
z = y >= max_sinh_arg ? zinf : z;
|
||||
z = aux > PINFBITPATT_SP32 || aux < 0x38800000U ? x : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
|
||||
// After dealing with special cases the computation is split into
|
||||
// regions as follows:
|
||||
//
|
||||
// abs(x) >= max_sinh_arg:
|
||||
// sinh(x) = sign(x)*Inf
|
||||
//
|
||||
// abs(x) >= small_threshold:
|
||||
// sinh(x) = sign(x)*exp(abs(x))/2 computed using the
|
||||
// splitexp and scaleDouble functions as for exp_amd().
|
||||
//
|
||||
// abs(x) < small_threshold:
|
||||
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
|
||||
// sinh(x) is then sign(x)*z.
|
||||
|
||||
// 0x408633ce8fb9f87e
|
||||
const __CLC_GENTYPE max_sinh_arg = 7.10475860073943977113e+02;
|
||||
|
||||
// This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
|
||||
const __CLC_GENTYPE small_threshold = 0x1.2b708872320e2p+4;
|
||||
|
||||
__CLC_GENTYPE y = __clc_fabs(x);
|
||||
|
||||
// In this range we find the integer part y0 of y
|
||||
// and the increment dy = y - y0. We then compute
|
||||
// z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
|
||||
// where sinh(y0) and cosh(y0) are obtained from tables
|
||||
|
||||
__CLC_INTN ind = __clc_min(__CLC_CONVERT_INTN(y), 36);
|
||||
__CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
|
||||
__CLC_GENTYPE dy2 = dy * dy;
|
||||
|
||||
__CLC_GENTYPE sdy =
|
||||
dy * dy2 *
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(dy2,
|
||||
__clc_fma(dy2, 0.7746188980094184251527126e-12,
|
||||
0.160576793121939886190847e-9),
|
||||
0.250521176994133472333666e-7),
|
||||
0.275573191913636406057211e-5),
|
||||
0.198412698413242405162014e-3),
|
||||
0.833333333333329931873097e-2),
|
||||
0.166666666666666667013899e0);
|
||||
|
||||
__CLC_GENTYPE cdy =
|
||||
dy2 *
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(
|
||||
dy2,
|
||||
__clc_fma(dy2,
|
||||
__clc_fma(dy2, 0.1163921388172173692062032e-10,
|
||||
0.208744349831471353536305e-8),
|
||||
0.275573350756016588011357e-6),
|
||||
0.248015872460622433115785e-4),
|
||||
0.138888888889814854814536e-2),
|
||||
0.416666666666660876512776e-1),
|
||||
0.500000000000000005911074e0);
|
||||
|
||||
// At this point sinh(dy) is approximated by dy + sdy.
|
||||
// Shift some significant bits from dy to sdy.
|
||||
__CLC_GENTYPE sdy1 =
|
||||
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(dy) & 0xfffffffff8000000UL);
|
||||
__CLC_GENTYPE sdy2 = sdy + (dy - sdy1);
|
||||
|
||||
__CLC_GENTYPE cl = USE_TABLE(cosh_tbl_head, ind);
|
||||
__CLC_GENTYPE ct = USE_TABLE(cosh_tbl_tail, ind);
|
||||
__CLC_GENTYPE sl = USE_TABLE(sinh_tbl_head, ind);
|
||||
__CLC_GENTYPE st = USE_TABLE(sinh_tbl_tail, ind);
|
||||
|
||||
__CLC_GENTYPE z =
|
||||
__clc_fma(cl, sdy1,
|
||||
__clc_fma(sl, cdy,
|
||||
__clc_fma(cl, sdy2,
|
||||
__clc_fma(ct, sdy1,
|
||||
__clc_fma(st, cdy, ct * sdy2)) +
|
||||
st))) +
|
||||
sl;
|
||||
|
||||
// Other cases
|
||||
z = (y < 0x1.0p-28) || __clc_isnan(x) || __clc_isinf(x) ? y : z;
|
||||
|
||||
__CLC_GENTYPE t = __clc_exp(y - 0x1.62e42fefa3800p-1);
|
||||
t = __clc_fma(t, -0x1.ef35793c76641p-45, t);
|
||||
z = y >= small_threshold ? t : z;
|
||||
z = y >= max_sinh_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z;
|
||||
|
||||
return __clc_copysign(z, x);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_sinh(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
@ -339,6 +339,37 @@ DECLARE_TABLE(float, CBRT_TBL_TAIL, 129) = {
|
||||
|
||||
CLC_TABLE_FUNCTION(float, CBRT_TBL_TAIL, cbrt_tbl_tail);
|
||||
|
||||
// Tabulated values of sinh(i) and cosh(i) for i = 0,...,36.
|
||||
DECLARE_TABLE(float, SINHCOSH_TBL_HEAD, 37) = {
|
||||
0x0.000000p+0f, 0x1.2cd9fcp+0f, 0x1.d03cf6p+1f, 0x1.40926ep+3f,
|
||||
0x1.b4a380p+4f, 0x1.28d016p+6f, 0x1.936d22p+7f, 0x1.122876p+9f,
|
||||
0x1.749ea6p+10f, 0x1.fa7158p+11f, 0x1.5829dcp+13f, 0x1.d3c448p+14f,
|
||||
0x1.3de166p+16f, 0x1.b00b5ap+17f, 0x1.259ac4p+19f, 0x1.8f0ccap+20f,
|
||||
0x1.0f2ebep+22f, 0x1.709348p+23f, 0x1.f4f220p+24f, 0x1.546d90p+26f,
|
||||
0x1.ceb088p+27f, 0x1.3a6e20p+29f, 0x1.ab5adcp+30f, 0x1.226af4p+32f,
|
||||
0x1.8ab7fcp+33f, 0x1.0c3d3ap+35f, 0x1.6c9326p+36f, 0x1.ef8230p+37f,
|
||||
0x1.50bba4p+39f, 0x1.c9aae4p+40f, 0x1.370470p+42f, 0x1.a6b766p+43f,
|
||||
0x1.1f43fcp+45f, 0x1.866f34p+46f, 0x1.0953e2p+48f, 0x1.689e22p+49f,
|
||||
0x1.ea215ap+50f,
|
||||
};
|
||||
|
||||
CLC_TABLE_FUNCTION(float, SINHCOSH_TBL_HEAD, sinhcosh_tbl_head);
|
||||
|
||||
DECLARE_TABLE(float, SINHCOSH_TBL_TAIL, 37) = {
|
||||
0x1.000000p+0f, 0x1.8b0756p+0f, 0x1.e18fa0p+1f, 0x1.422a4ap+3f,
|
||||
0x1.b4ee86p+4f, 0x1.28d6fcp+6f, 0x1.936e68p+7f, 0x1.122894p+9f,
|
||||
0x1.749eaap+10f, 0x1.fa7158p+11f, 0x1.5829dep+13f, 0x1.d3c448p+14f,
|
||||
0x1.3de166p+16f, 0x1.b00b5ap+17f, 0x1.259ac4p+19f, 0x1.8f0ccap+20f,
|
||||
0x1.0f2ebep+22f, 0x1.709348p+23f, 0x1.f4f220p+24f, 0x1.546d90p+26f,
|
||||
0x1.ceb088p+27f, 0x1.3a6e20p+29f, 0x1.ab5adcp+30f, 0x1.226af4p+32f,
|
||||
0x1.8ab7fcp+33f, 0x1.0c3d3ap+35f, 0x1.6c9326p+36f, 0x1.ef8230p+37f,
|
||||
0x1.50bba4p+39f, 0x1.c9aae4p+40f, 0x1.370470p+42f, 0x1.a6b766p+43f,
|
||||
0x1.1f43fcp+45f, 0x1.866f34p+46f, 0x1.0953e2p+48f, 0x1.689e22p+49f,
|
||||
0x1.ea215ap+50f,
|
||||
};
|
||||
|
||||
CLC_TABLE_FUNCTION(float, SINHCOSH_TBL_TAIL, sinhcosh_tbl_tail);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
@ -1279,4 +1310,73 @@ DECLARE_TABLE(double, CBRT_REM_TBL_TAIL, 5) = {
|
||||
|
||||
CLC_TABLE_FUNCTION(double, CBRT_REM_TBL_TAIL, cbrt_rem_tbl_tail);
|
||||
|
||||
DECLARE_TABLE(double, SINH_TBL_HEAD, 37) = {
|
||||
0x0.0000000000000p+0, 0x1.2cd9fc0000000p+0, 0x1.d03cf60000000p+1,
|
||||
0x1.40926e0000000p+3, 0x1.b4a3800000000p+4, 0x1.28d0160000000p+6,
|
||||
0x1.936d228000000p+7, 0x1.1228768000000p+9, 0x1.749ea50000000p+10,
|
||||
0x1.fa71570000000p+11, 0x1.5829dc8000000p+13, 0x1.d3c4488000000p+14,
|
||||
0x1.3de1650000000p+16, 0x1.b00b590000000p+17, 0x1.259ac48000000p+19,
|
||||
0x1.8f0cca8000000p+20, 0x1.0f2ebd0000000p+22, 0x1.7093488000000p+23,
|
||||
0x1.f4f2208000000p+24, 0x1.546d8f8000000p+26, 0x1.ceb0888000000p+27,
|
||||
0x1.3a6e1f8000000p+29, 0x1.ab5adb8000000p+30, 0x1.226af30000000p+32,
|
||||
0x1.8ab7fb0000000p+33, 0x1.0c3d390000000p+35, 0x1.6c93268000000p+36,
|
||||
0x1.ef822f0000000p+37, 0x1.50bba30000000p+39, 0x1.c9aae40000000p+40,
|
||||
0x1.3704708000000p+42, 0x1.a6b7658000000p+43, 0x1.1f43fc8000000p+45,
|
||||
0x1.866f348000000p+46, 0x1.0953e28000000p+48, 0x1.689e220000000p+49,
|
||||
0x1.ea215a0000000p+50,
|
||||
};
|
||||
|
||||
DECLARE_TABLE(double, SINH_TBL_TAIL, 37) = {
|
||||
0x0.0000000000000p+0, 0x1.13ae6096a0092p-26, 0x1.db70cfb79a640p-26,
|
||||
0x1.c2526b66dc067p-23, 0x1.b81b18647f380p-23, 0x1.bc1cdd1e1eb08p-20,
|
||||
0x1.d9f201534fb09p-19, 0x1.d1c064a4e9954p-18, 0x1.4eca65d06ea74p-18,
|
||||
0x1.0c259bcc0ecc5p-15, 0x1.b5a6647cf9016p-13, 0x1.9691adefb0870p-15,
|
||||
0x1.3410fc29cde38p-10, 0x1.6a31a50b6fb3cp-11, 0x1.7defc71805c40p-10,
|
||||
0x1.eb49fd80e0babp-6, 0x1.4fffc7bcd5920p-7, 0x1.03a93b6c63435p-3,
|
||||
0x1.1940bb255fd1cp-4, 0x1.ed26e14260b50p-2, 0x1.b47401fc9f2a2p+0,
|
||||
0x1.67bb3f55634f1p+3, 0x1.c435ff8194ddcp+2, 0x1.d8fee052ba63ap+5,
|
||||
0x1.51d7edccde3f6p+7, 0x1.04b1644557d1ap+8, 0x1.6a6b5ca0a9dc4p+8,
|
||||
0x1.fd9cc72249abap+11, 0x1.e58de693edab5p+13, 0x1.8c70158ac6363p+14,
|
||||
0x1.7614764f43e20p+15, 0x1.6337db36fc718p+17, 0x1.12d98b1f611e2p+19,
|
||||
0x1.392bc108b37ccp+19, 0x1.ce87bdc3473dcp+22, 0x1.bc8d5ae99ad14p+21,
|
||||
0x1.d20d76744835cp+22,
|
||||
};
|
||||
|
||||
DECLARE_TABLE(double, COSH_TBL_HEAD, 37) = {
|
||||
0x1.0000000000000p+0, 0x1.8b07550000000p+0, 0x1.e18fa08000000p+1,
|
||||
0x1.422a490000000p+3, 0x1.b4ee858000000p+4, 0x1.28d6fc8000000p+6,
|
||||
0x1.936e678000000p+7, 0x1.1228948000000p+9, 0x1.749eaa8000000p+10,
|
||||
0x1.fa71580000000p+11, 0x1.5829dd0000000p+13, 0x1.d3c4488000000p+14,
|
||||
0x1.3de1650000000p+16, 0x1.b00b590000000p+17, 0x1.259ac48000000p+19,
|
||||
0x1.8f0cca8000000p+20, 0x1.0f2ebd0000000p+22, 0x1.7093488000000p+23,
|
||||
0x1.f4f2208000000p+24, 0x1.546d8f8000000p+26, 0x1.ceb0888000000p+27,
|
||||
0x1.3a6e1f8000000p+29, 0x1.ab5adb8000000p+30, 0x1.226af30000000p+32,
|
||||
0x1.8ab7fb0000000p+33, 0x1.0c3d390000000p+35, 0x1.6c93268000000p+36,
|
||||
0x1.ef822f0000000p+37, 0x1.50bba30000000p+39, 0x1.c9aae40000000p+40,
|
||||
0x1.3704708000000p+42, 0x1.a6b7658000000p+43, 0x1.1f43fc8000000p+45,
|
||||
0x1.866f348000000p+46, 0x1.0953e28000000p+48, 0x1.689e220000000p+49,
|
||||
0x1.ea215a0000000p+50,
|
||||
};
|
||||
|
||||
DECLARE_TABLE(double, COSH_TBL_TAIL, 37) = {
|
||||
0x0.0000000000000p+0, 0x1.d9f5504c2bd28p-28, 0x1.7cb66f0a4c9fdp-25,
|
||||
0x1.f58617928e588p-23, 0x1.bc7d000c38d48p-25, 0x1.f7f9d4e329998p-21,
|
||||
0x1.6e6e464885269p-19, 0x1.ba3a8b946c154p-19, 0x1.3f4e76110d5a4p-18,
|
||||
0x1.17622515a3e2bp-15, 0x1.4dc4b528af3d0p-17, 0x1.1156278615e10p-14,
|
||||
0x1.35ad50ed821f5p-10, 0x1.6b61055f2935cp-11, 0x1.7e2794a601240p-10,
|
||||
0x1.eb4b45f6aadd3p-6, 0x1.5000b967b3698p-7, 0x1.03a940fadc092p-3,
|
||||
0x1.1940bf3bf874cp-4, 0x1.ed26e1a2a2110p-2, 0x1.b4740205796d6p+0,
|
||||
0x1.67bb3f55cb85dp+3, 0x1.c435ff81e18acp+2, 0x1.d8fee052bdea4p+5,
|
||||
0x1.51d7edccde926p+7, 0x1.04b1644557e0ep+8, 0x1.6a6b5ca0a9e1cp+8,
|
||||
0x1.fd9cc72249abep+11, 0x1.e58de693edab5p+13, 0x1.8c70158ac6364p+14,
|
||||
0x1.7614764f43e20p+15, 0x1.6337db36fc718p+17, 0x1.12d98b1f611e2p+19,
|
||||
0x1.392bc108b37ccp+19, 0x1.ce87bdc3473dcp+22, 0x1.bc8d5ae99ad14p+21,
|
||||
0x1.d20d76744835cp+22,
|
||||
};
|
||||
|
||||
CLC_TABLE_FUNCTION(double, SINH_TBL_HEAD, sinh_tbl_head);
|
||||
CLC_TABLE_FUNCTION(double, SINH_TBL_TAIL, sinh_tbl_tail);
|
||||
CLC_TABLE_FUNCTION(double, COSH_TBL_HEAD, cosh_tbl_head);
|
||||
CLC_TABLE_FUNCTION(double, COSH_TBL_TAIL, cosh_tbl_tail);
|
||||
|
||||
#endif // cl_khr_fp64
|
||||
|
21
libclc/clc/lib/generic/math/clc_tanh.cl
Normal file
21
libclc/clc/lib/generic/math/clc_tanh.cl
Normal file
@ -0,0 +1,21 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_exp.h>
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/relational/clc_isinf.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
#include <clc/shared/clc_min.h>
|
||||
|
||||
#define __CLC_BODY <clc_tanh.inc>
|
||||
#include <clc/math/gentype.inc>
|
137
libclc/clc/lib/generic/math/clc_tanh.inc
Normal file
137
libclc/clc/lib/generic/math/clc_tanh.inc
Normal file
@ -0,0 +1,137 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
|
||||
// The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
|
||||
// to the following three formulae:
|
||||
// 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
|
||||
// 2. (1 - (2/(exp(2*x) + 1 )))
|
||||
// 3. (exp(2*x) - 1)/(exp(2*x) + 1)
|
||||
// but computationally, some formulae are better on some ranges.
|
||||
|
||||
const __CLC_GENTYPE large_threshold = 0x1.0a2b24p+3f;
|
||||
|
||||
__CLC_UINTN ux = __CLC_AS_UINTN(x);
|
||||
__CLC_UINTN aux = ux & EXSIGNBIT_SP32;
|
||||
__CLC_UINTN xs = ux ^ aux;
|
||||
|
||||
__CLC_GENTYPE y = __CLC_AS_GENTYPE(aux);
|
||||
__CLC_GENTYPE y2 = y * y;
|
||||
|
||||
__CLC_GENTYPE a1 = __clc_mad(
|
||||
y2, __clc_mad(y2, 0.4891631088530669873e-4F, -0.14628356048797849e-2F),
|
||||
-0.28192806108402678e0F);
|
||||
__CLC_GENTYPE b1 =
|
||||
__clc_mad(y2, 0.3427017942262751343e0F, 0.845784192581041099e0F);
|
||||
|
||||
__CLC_GENTYPE a2 = __clc_mad(
|
||||
y2, __clc_mad(y2, 0.3827534993599483396e-4F, -0.12325644183611929e-2F),
|
||||
-0.24069858695196524e0F);
|
||||
__CLC_GENTYPE b2 =
|
||||
__clc_mad(y2, 0.292529068698052819e0F, 0.72209738473684982e0F);
|
||||
|
||||
__CLC_INTN c = y < 0.9f;
|
||||
__CLC_GENTYPE a = c ? a1 : a2;
|
||||
__CLC_GENTYPE b = c ? b1 : b2;
|
||||
__CLC_GENTYPE zlo = __clc_mad(MATH_DIVIDE(a, b), y * y2, y);
|
||||
|
||||
__CLC_GENTYPE p = __clc_exp(2.0f * y) + 1.0f;
|
||||
__CLC_GENTYPE zhi = 1.0F - MATH_DIVIDE(2.0F, p);
|
||||
|
||||
__CLC_GENTYPE z = y <= 1.0f ? zlo : zhi;
|
||||
z = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(z));
|
||||
|
||||
// Edge cases
|
||||
__CLC_GENTYPE sone = __CLC_AS_GENTYPE(0x3f800000U | xs);
|
||||
z = y > large_threshold ? sone : z;
|
||||
z = aux < 0x39000000 || aux > 0x7f800000 ? x : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
|
||||
// The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
|
||||
// to the following three formulae:
|
||||
// 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
|
||||
// 2. (1 - (2/(exp(2*x) + 1 )))
|
||||
// 3. (exp(2*x) - 1)/(exp(2*x) + 1)
|
||||
// but computationally, some formulae are better on some ranges.
|
||||
|
||||
// The point at which e^-x is insignificant compared to e^x = ln(2^27)
|
||||
const __CLC_GENTYPE large_threshold = 0x1.2b708872320e2p+4;
|
||||
|
||||
__CLC_ULONGN ux = __CLC_AS_ULONGN(x);
|
||||
__CLC_ULONGN ax = ux & ~SIGNBIT_DP64;
|
||||
__CLC_ULONGN sx = ux ^ ax;
|
||||
__CLC_GENTYPE y = __CLC_AS_GENTYPE(ax);
|
||||
__CLC_GENTYPE y2 = y * y;
|
||||
|
||||
// y < 0.9
|
||||
__CLC_GENTYPE znl =
|
||||
__clc_fma(y2,
|
||||
__clc_fma(y2,
|
||||
__clc_fma(y2, -0.142077926378834722618091e-7,
|
||||
-0.200047621071909498730453e-3),
|
||||
-0.176016349003044679402273e-1),
|
||||
-0.274030424656179760118928e0);
|
||||
|
||||
__CLC_GENTYPE zdl =
|
||||
__clc_fma(y2,
|
||||
__clc_fma(y2,
|
||||
__clc_fma(y2, 0.2091140262529164482568557e-3,
|
||||
0.201562166026937652780575e-1),
|
||||
0.381641414288328849317962e0),
|
||||
0.822091273968539282568011e0);
|
||||
|
||||
// 0.9 <= y <= 1
|
||||
__CLC_GENTYPE znm =
|
||||
__clc_fma(y2,
|
||||
__clc_fma(y2,
|
||||
__clc_fma(y2, -0.115475878996143396378318e-7,
|
||||
-0.165597043903549960486816e-3),
|
||||
-0.146173047288731678404066e-1),
|
||||
-0.227793870659088295252442e0);
|
||||
|
||||
__CLC_GENTYPE zdm =
|
||||
__clc_fma(y2,
|
||||
__clc_fma(y2,
|
||||
__clc_fma(y2, 0.173076050126225961768710e-3,
|
||||
0.167358775461896562588695e-1),
|
||||
0.317204558977294374244770e0),
|
||||
0.683381611977295894959554e0);
|
||||
|
||||
__CLC_LONGN c = y < 0.9;
|
||||
__CLC_GENTYPE zn = c ? znl : znm;
|
||||
__CLC_GENTYPE zd = c ? zdl : zdm;
|
||||
__CLC_GENTYPE z = y + y * y2 * MATH_DIVIDE(zn, zd);
|
||||
|
||||
// y > 1
|
||||
__CLC_GENTYPE p = __clc_exp(2.0 * y) + 1.0;
|
||||
__CLC_GENTYPE zg = 1.0 - 2.0 / p;
|
||||
|
||||
z = y > 1.0 ? zg : z;
|
||||
|
||||
// Other cases
|
||||
z = y < 0x1.0p-28 || ax > PINFBITPATT_DP64 ? x : z;
|
||||
|
||||
z = y > large_threshold ? 1.0 : z;
|
||||
|
||||
return __CLC_AS_GENTYPE(sx | __CLC_AS_ULONGN(z));
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_tanh(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
@ -7,179 +7,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/math/clc_cosh.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float cosh(float x) {
|
||||
|
||||
// After dealing with special cases the computation is split into regions as follows.
|
||||
// abs(x) >= max_cosh_arg:
|
||||
// cosh(x) = sign(x)*Inf
|
||||
// abs(x) >= small_threshold:
|
||||
// cosh(x) = sign(x)*exp(abs(x))/2 computed using the
|
||||
// splitexp and scaleDouble functions as for exp_amd().
|
||||
// abs(x) < small_threshold:
|
||||
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
|
||||
// cosh(x) is then z.
|
||||
|
||||
const float max_cosh_arg = 0x1.65a9fap+6f;
|
||||
const float small_threshold = 0x1.0a2b24p+3f;
|
||||
|
||||
uint ux = as_uint(x);
|
||||
uint aux = ux & EXSIGNBIT_SP32;
|
||||
float y = as_float(aux);
|
||||
|
||||
// Find the integer part y0 of y and the increment dy = y - y0. We then compute
|
||||
// z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
|
||||
// z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
|
||||
// where sinh(y0) and cosh(y0) are tabulated above.
|
||||
|
||||
int ind = (int)y;
|
||||
ind = (uint)ind > 36U ? 0 : ind;
|
||||
|
||||
float dy = y - ind;
|
||||
float dy2 = dy * dy;
|
||||
|
||||
float sdy = mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f),
|
||||
0.250521176994133472333666e-7f),
|
||||
0.275573191913636406057211e-5f),
|
||||
0.198412698413242405162014e-3f),
|
||||
0.833333333333329931873097e-2f),
|
||||
0.166666666666666667013899e0f);
|
||||
sdy = mad(sdy, dy*dy2, dy);
|
||||
|
||||
float cdy = mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f),
|
||||
0.275573350756016588011357e-6f),
|
||||
0.248015872460622433115785e-4f),
|
||||
0.138888888889814854814536e-2f),
|
||||
0.416666666666660876512776e-1f),
|
||||
0.500000000000000005911074e0f);
|
||||
cdy = mad(cdy, dy2, 1.0f);
|
||||
|
||||
float2 tv = USE_TABLE(sinhcosh_tbl, ind);
|
||||
float z = mad(tv.s0, sdy, tv.s1 * cdy);
|
||||
|
||||
// When exp(-x) is insignificant compared to exp(x), return exp(x)/2
|
||||
float t = exp(y - 0x1.62e500p-1f);
|
||||
float zsmall = mad(0x1.a0210ep-18f, t, t);
|
||||
z = y >= small_threshold ? zsmall : z;
|
||||
|
||||
// Corner cases
|
||||
z = y >= max_cosh_arg ? as_float(PINFBITPATT_SP32) : z;
|
||||
z = aux > PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : z;
|
||||
z = aux < 0x38800000 ? 1.0f : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cosh, float);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double cosh(double x) {
|
||||
|
||||
// After dealing with special cases the computation is split into
|
||||
// regions as follows:
|
||||
//
|
||||
// abs(x) >= max_cosh_arg:
|
||||
// cosh(x) = sign(x)*Inf
|
||||
//
|
||||
// abs(x) >= small_threshold:
|
||||
// cosh(x) = sign(x)*exp(abs(x))/2 computed using the
|
||||
// splitexp and scaleDouble functions as for exp_amd().
|
||||
//
|
||||
// abs(x) < small_threshold:
|
||||
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
|
||||
// cosh(x) is then sign(x)*z.
|
||||
|
||||
// This is ln(2^1025)
|
||||
const double max_cosh_arg = 7.10475860073943977113e+02; // 0x408633ce8fb9f87e
|
||||
|
||||
// This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
|
||||
const double small_threshold = 0x1.2b708872320e2p+4;
|
||||
|
||||
double y = fabs(x);
|
||||
|
||||
// In this range we find the integer part y0 of y
|
||||
// and the increment dy = y - y0. We then compute
|
||||
// z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
|
||||
// where sinh(y0) and cosh(y0) are tabulated above.
|
||||
|
||||
int ind = min((int)y, 36);
|
||||
double dy = y - ind;
|
||||
double dy2 = dy * dy;
|
||||
|
||||
double sdy = dy * dy2 *
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9),
|
||||
0.250521176994133472333666e-7),
|
||||
0.275573191913636406057211e-5),
|
||||
0.198412698413242405162014e-3),
|
||||
0.833333333333329931873097e-2),
|
||||
0.166666666666666667013899e0);
|
||||
|
||||
double cdy = dy2 * fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8),
|
||||
0.275573350756016588011357e-6),
|
||||
0.248015872460622433115785e-4),
|
||||
0.138888888889814854814536e-2),
|
||||
0.416666666666660876512776e-1),
|
||||
0.500000000000000005911074e0);
|
||||
|
||||
// At this point sinh(dy) is approximated by dy + sdy,
|
||||
// and cosh(dy) is approximated by 1 + cdy.
|
||||
double2 tv = USE_TABLE(cosh_tbl, ind);
|
||||
double cl = tv.s0;
|
||||
double ct = tv.s1;
|
||||
tv = USE_TABLE(sinh_tbl, ind);
|
||||
double sl = tv.s0;
|
||||
double st = tv.s1;
|
||||
|
||||
double z = fma(sl, dy, fma(sl, sdy, fma(cl, cdy, fma(st, dy, fma(st, sdy, ct*cdy)) + ct))) + cl;
|
||||
|
||||
// Other cases
|
||||
z = y < 0x1.0p-28 ? 1.0 : z;
|
||||
|
||||
double t = exp(y - 0x1.62e42fefa3800p-1);
|
||||
t = fma(t, -0x1.ef35793c76641p-45, t);
|
||||
z = y >= small_threshold ? t : z;
|
||||
|
||||
z = y >= max_cosh_arg ? as_double(PINFBITPATT_DP64) : z;
|
||||
|
||||
z = isinf(x) | isnan(x) ? y : z;
|
||||
|
||||
return z;
|
||||
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN_FP16(cosh)
|
||||
|
||||
#endif
|
||||
#define FUNCTION cosh
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -7,178 +7,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/math/clc_sinh.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float sinh(float x)
|
||||
{
|
||||
// After dealing with special cases the computation is split into regions as follows.
|
||||
// abs(x) >= max_sinh_arg:
|
||||
// sinh(x) = sign(x)*Inf
|
||||
// abs(x) >= small_threshold:
|
||||
// sinh(x) = sign(x)*exp(abs(x))/2 computed using the splitexp and scaleDouble functions as for exp_amd().
|
||||
// abs(x) < small_threshold:
|
||||
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
|
||||
// sinh(x) is then sign(x)*z.
|
||||
|
||||
const float max_sinh_arg = 0x1.65a9fap+6f;
|
||||
const float small_threshold = 0x1.0a2b24p+3f;
|
||||
|
||||
uint ux = as_uint(x);
|
||||
uint aux = ux & EXSIGNBIT_SP32;
|
||||
uint xs = ux ^ aux;
|
||||
float y = as_float(aux);
|
||||
|
||||
// We find the integer part y0 of y and the increment dy = y - y0. We then compute
|
||||
// z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
|
||||
// where sinh(y0) and cosh(y0) are tabulated above.
|
||||
int ind = (int) y;
|
||||
ind = (uint)ind > 36U ? 0 : ind;
|
||||
|
||||
float dy = y - ind;
|
||||
float dy2 = dy * dy;
|
||||
|
||||
float sdy = mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f),
|
||||
0.250521176994133472333666e-7f),
|
||||
0.275573191913636406057211e-5f),
|
||||
0.198412698413242405162014e-3f),
|
||||
0.833333333333329931873097e-2f),
|
||||
0.166666666666666667013899e0f);
|
||||
sdy = mad(sdy, dy*dy2, dy);
|
||||
|
||||
float cdy = mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2,
|
||||
mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f),
|
||||
0.275573350756016588011357e-6f),
|
||||
0.248015872460622433115785e-4f),
|
||||
0.138888888889814854814536e-2f),
|
||||
0.416666666666660876512776e-1f),
|
||||
0.500000000000000005911074e0f);
|
||||
cdy = mad(cdy, dy2, 1.0f);
|
||||
|
||||
float2 tv = USE_TABLE(sinhcosh_tbl, ind);
|
||||
float z = mad(tv.s1, sdy, tv.s0 * cdy);
|
||||
z = as_float(xs | as_uint(z));
|
||||
|
||||
// When y is large enough so that the negative exponential is negligible,
|
||||
// so sinh(y) is approximated by sign(x)*exp(y)/2.
|
||||
float t = exp(y - 0x1.62e500p-1f);
|
||||
float zsmall = mad(0x1.a0210ep-18f, t, t);
|
||||
zsmall = as_float(xs | as_uint(zsmall));
|
||||
z = y >= small_threshold ? zsmall : z;
|
||||
|
||||
// Corner cases
|
||||
float zinf = as_float(PINFBITPATT_SP32 | xs);
|
||||
z = y >= max_sinh_arg ? zinf : z;
|
||||
z = aux > PINFBITPATT_SP32 | aux < 0x38800000U ? x : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinh, float);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double sinh(double x)
|
||||
{
|
||||
// After dealing with special cases the computation is split into
|
||||
// regions as follows:
|
||||
//
|
||||
// abs(x) >= max_sinh_arg:
|
||||
// sinh(x) = sign(x)*Inf
|
||||
//
|
||||
// abs(x) >= small_threshold:
|
||||
// sinh(x) = sign(x)*exp(abs(x))/2 computed using the
|
||||
// splitexp and scaleDouble functions as for exp_amd().
|
||||
//
|
||||
// abs(x) < small_threshold:
|
||||
// compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
|
||||
// sinh(x) is then sign(x)*z.
|
||||
|
||||
const double max_sinh_arg = 7.10475860073943977113e+02; // 0x408633ce8fb9f87e
|
||||
|
||||
// This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
|
||||
const double small_threshold = 0x1.2b708872320e2p+4;
|
||||
|
||||
double y = fabs(x);
|
||||
|
||||
// In this range we find the integer part y0 of y
|
||||
// and the increment dy = y - y0. We then compute
|
||||
// z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
|
||||
// where sinh(y0) and cosh(y0) are obtained from tables
|
||||
|
||||
int ind = min((int)y, 36);
|
||||
double dy = y - ind;
|
||||
double dy2 = dy * dy;
|
||||
|
||||
double sdy = dy * dy2 *
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9),
|
||||
0.250521176994133472333666e-7),
|
||||
0.275573191913636406057211e-5),
|
||||
0.198412698413242405162014e-3),
|
||||
0.833333333333329931873097e-2),
|
||||
0.166666666666666667013899e0);
|
||||
|
||||
double cdy = dy2 * fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2,
|
||||
fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8),
|
||||
0.275573350756016588011357e-6),
|
||||
0.248015872460622433115785e-4),
|
||||
0.138888888889814854814536e-2),
|
||||
0.416666666666660876512776e-1),
|
||||
0.500000000000000005911074e0);
|
||||
|
||||
// At this point sinh(dy) is approximated by dy + sdy.
|
||||
// Shift some significant bits from dy to sdy.
|
||||
double sdy1 = as_double(as_ulong(dy) & 0xfffffffff8000000UL);
|
||||
double sdy2 = sdy + (dy - sdy1);
|
||||
|
||||
double2 tv = USE_TABLE(cosh_tbl, ind);
|
||||
double cl = tv.s0;
|
||||
double ct = tv.s1;
|
||||
tv = USE_TABLE(sinh_tbl, ind);
|
||||
double sl = tv.s0;
|
||||
double st = tv.s1;
|
||||
|
||||
double z = fma(cl, sdy1, fma(sl, cdy, fma(cl, sdy2, fma(ct, sdy1, fma(st, cdy, ct*sdy2)) + st))) + sl;
|
||||
|
||||
// Other cases
|
||||
z = (y < 0x1.0p-28) | isnan(x) | isinf(x) ? y : z;
|
||||
|
||||
double t = exp(y - 0x1.62e42fefa3800p-1);
|
||||
t = fma(t, -0x1.ef35793c76641p-45, t);
|
||||
z = y >= small_threshold ? t : z;
|
||||
z = y >= max_sinh_arg ? as_double(PINFBITPATT_DP64) : z;
|
||||
|
||||
return copysign(z, x);
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN_FP16(sinh)
|
||||
|
||||
#endif
|
||||
#define FUNCTION sinh
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -289,139 +289,9 @@ DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
|
||||
230, 139, 2, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
// Tabulated values of sinh(i) and cosh(i) for i = 0,...,36.
|
||||
DECLARE_TABLE(float2, SINHCOSH_TBL, 37) = {
|
||||
(float2)(0x0.000000p+0f, 0x1.000000p+0f),
|
||||
(float2)(0x1.2cd9fcp+0f, 0x1.8b0756p+0f),
|
||||
(float2)(0x1.d03cf6p+1f, 0x1.e18fa0p+1f),
|
||||
(float2)(0x1.40926ep+3f, 0x1.422a4ap+3f),
|
||||
(float2)(0x1.b4a380p+4f, 0x1.b4ee86p+4f),
|
||||
(float2)(0x1.28d016p+6f, 0x1.28d6fcp+6f),
|
||||
(float2)(0x1.936d22p+7f, 0x1.936e68p+7f),
|
||||
(float2)(0x1.122876p+9f, 0x1.122894p+9f),
|
||||
(float2)(0x1.749ea6p+10f, 0x1.749eaap+10f),
|
||||
(float2)(0x1.fa7158p+11f, 0x1.fa7158p+11f),
|
||||
(float2)(0x1.5829dcp+13f, 0x1.5829dep+13f),
|
||||
(float2)(0x1.d3c448p+14f, 0x1.d3c448p+14f),
|
||||
(float2)(0x1.3de166p+16f, 0x1.3de166p+16f),
|
||||
(float2)(0x1.b00b5ap+17f, 0x1.b00b5ap+17f),
|
||||
(float2)(0x1.259ac4p+19f, 0x1.259ac4p+19f),
|
||||
(float2)(0x1.8f0ccap+20f, 0x1.8f0ccap+20f),
|
||||
(float2)(0x1.0f2ebep+22f, 0x1.0f2ebep+22f),
|
||||
(float2)(0x1.709348p+23f, 0x1.709348p+23f),
|
||||
(float2)(0x1.f4f220p+24f, 0x1.f4f220p+24f),
|
||||
(float2)(0x1.546d90p+26f, 0x1.546d90p+26f),
|
||||
(float2)(0x1.ceb088p+27f, 0x1.ceb088p+27f),
|
||||
(float2)(0x1.3a6e20p+29f, 0x1.3a6e20p+29f),
|
||||
(float2)(0x1.ab5adcp+30f, 0x1.ab5adcp+30f),
|
||||
(float2)(0x1.226af4p+32f, 0x1.226af4p+32f),
|
||||
(float2)(0x1.8ab7fcp+33f, 0x1.8ab7fcp+33f),
|
||||
(float2)(0x1.0c3d3ap+35f, 0x1.0c3d3ap+35f),
|
||||
(float2)(0x1.6c9326p+36f, 0x1.6c9326p+36f),
|
||||
(float2)(0x1.ef8230p+37f, 0x1.ef8230p+37f),
|
||||
(float2)(0x1.50bba4p+39f, 0x1.50bba4p+39f),
|
||||
(float2)(0x1.c9aae4p+40f, 0x1.c9aae4p+40f),
|
||||
(float2)(0x1.370470p+42f, 0x1.370470p+42f),
|
||||
(float2)(0x1.a6b766p+43f, 0x1.a6b766p+43f),
|
||||
(float2)(0x1.1f43fcp+45f, 0x1.1f43fcp+45f),
|
||||
(float2)(0x1.866f34p+46f, 0x1.866f34p+46f),
|
||||
(float2)(0x1.0953e2p+48f, 0x1.0953e2p+48f),
|
||||
(float2)(0x1.689e22p+49f, 0x1.689e22p+49f),
|
||||
(float2)(0x1.ea215ap+50f, 0x1.ea215ap+50f)
|
||||
};
|
||||
|
||||
TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
|
||||
TABLE_FUNCTION(float2, LOG10_TBL, log10_tbl);
|
||||
|
||||
uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
|
||||
return *(__constant uint4 *)(PIBITS_TBL + idx);
|
||||
}
|
||||
|
||||
TABLE_FUNCTION(float2, SINHCOSH_TBL, sinhcosh_tbl);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
DECLARE_TABLE(double2, SINH_TBL, 37) = {
|
||||
(double2)(0x0.0000000000000p+0, 0x0.0000000000000p+0),
|
||||
(double2)(0x1.2cd9fc0000000p+0, 0x1.13ae6096a0092p-26),
|
||||
(double2)(0x1.d03cf60000000p+1, 0x1.db70cfb79a640p-26),
|
||||
(double2)(0x1.40926e0000000p+3, 0x1.c2526b66dc067p-23),
|
||||
(double2)(0x1.b4a3800000000p+4, 0x1.b81b18647f380p-23),
|
||||
(double2)(0x1.28d0160000000p+6, 0x1.bc1cdd1e1eb08p-20),
|
||||
(double2)(0x1.936d228000000p+7, 0x1.d9f201534fb09p-19),
|
||||
(double2)(0x1.1228768000000p+9, 0x1.d1c064a4e9954p-18),
|
||||
(double2)(0x1.749ea50000000p+10, 0x1.4eca65d06ea74p-18),
|
||||
(double2)(0x1.fa71570000000p+11, 0x1.0c259bcc0ecc5p-15),
|
||||
(double2)(0x1.5829dc8000000p+13, 0x1.b5a6647cf9016p-13),
|
||||
(double2)(0x1.d3c4488000000p+14, 0x1.9691adefb0870p-15),
|
||||
(double2)(0x1.3de1650000000p+16, 0x1.3410fc29cde38p-10),
|
||||
(double2)(0x1.b00b590000000p+17, 0x1.6a31a50b6fb3cp-11),
|
||||
(double2)(0x1.259ac48000000p+19, 0x1.7defc71805c40p-10),
|
||||
(double2)(0x1.8f0cca8000000p+20, 0x1.eb49fd80e0babp-6),
|
||||
(double2)(0x1.0f2ebd0000000p+22, 0x1.4fffc7bcd5920p-7),
|
||||
(double2)(0x1.7093488000000p+23, 0x1.03a93b6c63435p-3),
|
||||
(double2)(0x1.f4f2208000000p+24, 0x1.1940bb255fd1cp-4),
|
||||
(double2)(0x1.546d8f8000000p+26, 0x1.ed26e14260b50p-2),
|
||||
(double2)(0x1.ceb0888000000p+27, 0x1.b47401fc9f2a2p+0),
|
||||
(double2)(0x1.3a6e1f8000000p+29, 0x1.67bb3f55634f1p+3),
|
||||
(double2)(0x1.ab5adb8000000p+30, 0x1.c435ff8194ddcp+2),
|
||||
(double2)(0x1.226af30000000p+32, 0x1.d8fee052ba63ap+5),
|
||||
(double2)(0x1.8ab7fb0000000p+33, 0x1.51d7edccde3f6p+7),
|
||||
(double2)(0x1.0c3d390000000p+35, 0x1.04b1644557d1ap+8),
|
||||
(double2)(0x1.6c93268000000p+36, 0x1.6a6b5ca0a9dc4p+8),
|
||||
(double2)(0x1.ef822f0000000p+37, 0x1.fd9cc72249abap+11),
|
||||
(double2)(0x1.50bba30000000p+39, 0x1.e58de693edab5p+13),
|
||||
(double2)(0x1.c9aae40000000p+40, 0x1.8c70158ac6363p+14),
|
||||
(double2)(0x1.3704708000000p+42, 0x1.7614764f43e20p+15),
|
||||
(double2)(0x1.a6b7658000000p+43, 0x1.6337db36fc718p+17),
|
||||
(double2)(0x1.1f43fc8000000p+45, 0x1.12d98b1f611e2p+19),
|
||||
(double2)(0x1.866f348000000p+46, 0x1.392bc108b37ccp+19),
|
||||
(double2)(0x1.0953e28000000p+48, 0x1.ce87bdc3473dcp+22),
|
||||
(double2)(0x1.689e220000000p+49, 0x1.bc8d5ae99ad14p+21),
|
||||
(double2)(0x1.ea215a0000000p+50, 0x1.d20d76744835cp+22),
|
||||
};
|
||||
|
||||
DECLARE_TABLE(double2, COSH_TBL, 37) = {
|
||||
(double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
|
||||
(double2)(0x1.8b07550000000p+0, 0x1.d9f5504c2bd28p-28),
|
||||
(double2)(0x1.e18fa08000000p+1, 0x1.7cb66f0a4c9fdp-25),
|
||||
(double2)(0x1.422a490000000p+3, 0x1.f58617928e588p-23),
|
||||
(double2)(0x1.b4ee858000000p+4, 0x1.bc7d000c38d48p-25),
|
||||
(double2)(0x1.28d6fc8000000p+6, 0x1.f7f9d4e329998p-21),
|
||||
(double2)(0x1.936e678000000p+7, 0x1.6e6e464885269p-19),
|
||||
(double2)(0x1.1228948000000p+9, 0x1.ba3a8b946c154p-19),
|
||||
(double2)(0x1.749eaa8000000p+10, 0x1.3f4e76110d5a4p-18),
|
||||
(double2)(0x1.fa71580000000p+11, 0x1.17622515a3e2bp-15),
|
||||
(double2)(0x1.5829dd0000000p+13, 0x1.4dc4b528af3d0p-17),
|
||||
(double2)(0x1.d3c4488000000p+14, 0x1.1156278615e10p-14),
|
||||
(double2)(0x1.3de1650000000p+16, 0x1.35ad50ed821f5p-10),
|
||||
(double2)(0x1.b00b590000000p+17, 0x1.6b61055f2935cp-11),
|
||||
(double2)(0x1.259ac48000000p+19, 0x1.7e2794a601240p-10),
|
||||
(double2)(0x1.8f0cca8000000p+20, 0x1.eb4b45f6aadd3p-6),
|
||||
(double2)(0x1.0f2ebd0000000p+22, 0x1.5000b967b3698p-7),
|
||||
(double2)(0x1.7093488000000p+23, 0x1.03a940fadc092p-3),
|
||||
(double2)(0x1.f4f2208000000p+24, 0x1.1940bf3bf874cp-4),
|
||||
(double2)(0x1.546d8f8000000p+26, 0x1.ed26e1a2a2110p-2),
|
||||
(double2)(0x1.ceb0888000000p+27, 0x1.b4740205796d6p+0),
|
||||
(double2)(0x1.3a6e1f8000000p+29, 0x1.67bb3f55cb85dp+3),
|
||||
(double2)(0x1.ab5adb8000000p+30, 0x1.c435ff81e18acp+2),
|
||||
(double2)(0x1.226af30000000p+32, 0x1.d8fee052bdea4p+5),
|
||||
(double2)(0x1.8ab7fb0000000p+33, 0x1.51d7edccde926p+7),
|
||||
(double2)(0x1.0c3d390000000p+35, 0x1.04b1644557e0ep+8),
|
||||
(double2)(0x1.6c93268000000p+36, 0x1.6a6b5ca0a9e1cp+8),
|
||||
(double2)(0x1.ef822f0000000p+37, 0x1.fd9cc72249abep+11),
|
||||
(double2)(0x1.50bba30000000p+39, 0x1.e58de693edab5p+13),
|
||||
(double2)(0x1.c9aae40000000p+40, 0x1.8c70158ac6364p+14),
|
||||
(double2)(0x1.3704708000000p+42, 0x1.7614764f43e20p+15),
|
||||
(double2)(0x1.a6b7658000000p+43, 0x1.6337db36fc718p+17),
|
||||
(double2)(0x1.1f43fc8000000p+45, 0x1.12d98b1f611e2p+19),
|
||||
(double2)(0x1.866f348000000p+46, 0x1.392bc108b37ccp+19),
|
||||
(double2)(0x1.0953e28000000p+48, 0x1.ce87bdc3473dcp+22),
|
||||
(double2)(0x1.689e220000000p+49, 0x1.bc8d5ae99ad14p+21),
|
||||
(double2)(0x1.ea215a0000000p+50, 0x1.d20d76744835cp+22)
|
||||
};
|
||||
|
||||
TABLE_FUNCTION(double2, SINH_TBL, sinh_tbl);
|
||||
TABLE_FUNCTION(double2, COSH_TBL, cosh_tbl);
|
||||
|
||||
#endif // cl_khr_fp64
|
||||
|
@ -7,133 +7,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/clc_tanh.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float tanh(float x)
|
||||
{
|
||||
// The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
|
||||
// to the following three formulae:
|
||||
// 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
|
||||
// 2. (1 - (2/(exp(2*x) + 1 )))
|
||||
// 3. (exp(2*x) - 1)/(exp(2*x) + 1)
|
||||
// but computationally, some formulae are better on some ranges.
|
||||
|
||||
const float large_threshold = 0x1.0a2b24p+3f;
|
||||
|
||||
uint ux = as_uint(x);
|
||||
uint aux = ux & EXSIGNBIT_SP32;
|
||||
uint xs = ux ^ aux;
|
||||
|
||||
float y = as_float(aux);
|
||||
float y2 = y*y;
|
||||
|
||||
float a1 = mad(y2,
|
||||
mad(y2, 0.4891631088530669873e-4F, -0.14628356048797849e-2F),
|
||||
-0.28192806108402678e0F);
|
||||
float b1 = mad(y2, 0.3427017942262751343e0F, 0.845784192581041099e0F);
|
||||
|
||||
float a2 = mad(y2,
|
||||
mad(y2, 0.3827534993599483396e-4F, -0.12325644183611929e-2F),
|
||||
-0.24069858695196524e0F);
|
||||
float b2 = mad(y2, 0.292529068698052819e0F, 0.72209738473684982e0F);
|
||||
|
||||
int c = y < 0.9f;
|
||||
float a = c ? a1 : a2;
|
||||
float b = c ? b1 : b2;
|
||||
float zlo = mad(MATH_DIVIDE(a, b), y*y2, y);
|
||||
|
||||
float p = exp(2.0f * y) + 1.0f;
|
||||
float zhi = 1.0F - MATH_DIVIDE(2.0F, p);
|
||||
|
||||
float z = y <= 1.0f ? zlo : zhi;
|
||||
z = as_float(xs | as_uint(z));
|
||||
|
||||
// Edge cases
|
||||
float sone = as_float(0x3f800000U | xs);
|
||||
z = y > large_threshold ? sone : z;
|
||||
z = aux < 0x39000000 | aux > 0x7f800000 ? x : z;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, tanh, float);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double tanh(double x)
|
||||
{
|
||||
// The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
|
||||
// to the following three formulae:
|
||||
// 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
|
||||
// 2. (1 - (2/(exp(2*x) + 1 )))
|
||||
// 3. (exp(2*x) - 1)/(exp(2*x) + 1)
|
||||
// but computationally, some formulae are better on some ranges.
|
||||
|
||||
// The point at which e^-x is insignificant compared to e^x = ln(2^27)
|
||||
const double large_threshold = 0x1.2b708872320e2p+4;
|
||||
|
||||
ulong ux = as_ulong(x);
|
||||
ulong ax = ux & ~SIGNBIT_DP64;
|
||||
ulong sx = ux ^ ax;
|
||||
double y = as_double(ax);
|
||||
double y2 = y * y;
|
||||
|
||||
// y < 0.9
|
||||
double znl = fma(y2,
|
||||
fma(y2,
|
||||
fma(y2, -0.142077926378834722618091e-7, -0.200047621071909498730453e-3),
|
||||
-0.176016349003044679402273e-1),
|
||||
-0.274030424656179760118928e0);
|
||||
|
||||
double zdl = fma(y2,
|
||||
fma(y2,
|
||||
fma(y2, 0.2091140262529164482568557e-3, 0.201562166026937652780575e-1),
|
||||
0.381641414288328849317962e0),
|
||||
0.822091273968539282568011e0);
|
||||
|
||||
// 0.9 <= y <= 1
|
||||
double znm = fma(y2,
|
||||
fma(y2,
|
||||
fma(y2, -0.115475878996143396378318e-7, -0.165597043903549960486816e-3),
|
||||
-0.146173047288731678404066e-1),
|
||||
-0.227793870659088295252442e0);
|
||||
|
||||
double zdm = fma(y2,
|
||||
fma(y2,
|
||||
fma(y2, 0.173076050126225961768710e-3, 0.167358775461896562588695e-1),
|
||||
0.317204558977294374244770e0),
|
||||
0.683381611977295894959554e0);
|
||||
|
||||
int c = y < 0.9;
|
||||
double zn = c ? znl : znm;
|
||||
double zd = c ? zdl : zdm;
|
||||
double z = y + y*y2 * MATH_DIVIDE(zn, zd);
|
||||
|
||||
// y > 1
|
||||
double p = exp(2.0 * y) + 1.0;
|
||||
double zg = 1.0 - 2.0 / p;
|
||||
|
||||
z = y > 1.0 ? zg : z;
|
||||
|
||||
// Other cases
|
||||
z = y < 0x1.0p-28 | ax > PINFBITPATT_DP64 ? x : z;
|
||||
|
||||
z = y > large_threshold ? 1.0 : z;
|
||||
|
||||
return as_double(sx | as_ulong(z));
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double);
|
||||
|
||||
#endif // cl_khr_fp64
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN_FP16(tanh)
|
||||
|
||||
#endif
|
||||
#define FUNCTION tanh
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
Loading…
x
Reference in New Issue
Block a user