mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-15 22:06:32 +00:00
[libclc] Move sinpi/cospi/tanpi to the CLC library (#133889)
Additionally, these builtins are now vectorized. This also moves the native_recip and native_divide builtins as they are used by the tanpi builtin.
This commit is contained in:
parent
1d9ad99305
commit
13a313fe58
@ -265,11 +265,13 @@ endif()
|
||||
set_source_files_properties(
|
||||
# CLC builtins
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_cos.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_divide.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_exp2.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_exp.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log10.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log2.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_recip.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_rsqrt.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_sin.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_sqrt.cl
|
||||
|
20
libclc/clc/include/clc/math/clc_cospi.h
Normal file
20
libclc/clc/include/clc/math/clc_cospi.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_COSPI_H__
|
||||
#define __CLC_MATH_CLC_COSPI_H__
|
||||
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_cospi
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_COSPI_H__
|
22
libclc/clc/include/clc/math/clc_native_divide.h
Normal file
22
libclc/clc/include/clc/math/clc_native_divide.h
Normal file
@ -0,0 +1,22 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_NATIVE_DIVIDE_H__
|
||||
#define __CLC_MATH_CLC_NATIVE_DIVIDE_H__
|
||||
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_FUNCTION __clc_native_divide
|
||||
#define __CLC_BODY <clc/shared/binary_decl.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
#undef __FLOAT_ONLY
|
||||
|
||||
#endif // __CLC_MATH_CLC_NATIVE_DIVIDE_H__
|
22
libclc/clc/include/clc/math/clc_native_recip.h
Normal file
22
libclc/clc/include/clc/math/clc_native_recip.h
Normal file
@ -0,0 +1,22 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_NATIVE_RECIP_H__
|
||||
#define __CLC_MATH_CLC_NATIVE_RECIP_H__
|
||||
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_FUNCTION __clc_native_recip
|
||||
#define __CLC_BODY <clc/shared/unary_decl.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
#undef __FLOAT_ONLY
|
||||
|
||||
#endif // __CLC_MATH_CLC_NATIVE_RECIP_H__
|
@ -10,6 +10,8 @@ _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x,
|
||||
__CLC_FLOATN y);
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
|
||||
__CLC_FLOATN y);
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
|
||||
__CLC_INTN regn);
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_FLOATN *r,
|
||||
private __CLC_FLOATN *rr,
|
||||
|
14
libclc/clc/include/clc/math/clc_sincos_piby4.h
Normal file
14
libclc/clc/include/clc/math/clc_sincos_piby4.h
Normal file
@ -0,0 +1,14 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/math.h>
|
||||
|
||||
#define __CLC_BODY <clc/math/clc_sincos_piby4.inc>
|
||||
#include <clc/math/gentype.inc>
|
174
libclc/clc/include/clc/math/clc_sincos_piby4.inc
Normal file
174
libclc/clc/include/clc/math/clc_sincos_piby4.inc
Normal file
@ -0,0 +1,174 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
|
||||
_CLC_INLINE _CLC_OVERLOAD void
|
||||
__clc_sincos_piby4(__CLC_GENTYPE x, private __CLC_GENTYPE *sinval,
|
||||
private __CLC_GENTYPE *cosval) {
|
||||
// Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
|
||||
// = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
|
||||
// = x * f(w)
|
||||
// where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
|
||||
// We use a minimax approximation of (f(w) - 1) / w
|
||||
// because this produces an expansion in even powers of x.
|
||||
|
||||
// Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
|
||||
// = f(w)
|
||||
// where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
|
||||
// We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
|
||||
// because this produces an expansion in even powers of x.
|
||||
|
||||
const __CLC_GENTYPE sc1 = -0.166666666638608441788607926e0F;
|
||||
const __CLC_GENTYPE sc2 = 0.833333187633086262120839299e-2F;
|
||||
const __CLC_GENTYPE sc3 = -0.198400874359527693921333720e-3F;
|
||||
const __CLC_GENTYPE sc4 = 0.272500015145584081596826911e-5F;
|
||||
|
||||
const __CLC_GENTYPE cc1 = 0.41666666664325175238031e-1F;
|
||||
const __CLC_GENTYPE cc2 = -0.13888887673175665567647e-2F;
|
||||
const __CLC_GENTYPE cc3 = 0.24800600878112441958053e-4F;
|
||||
const __CLC_GENTYPE cc4 = -0.27301013343179832472841e-6F;
|
||||
|
||||
__CLC_GENTYPE x2 = x * x;
|
||||
|
||||
*sinval = __clc_mad(
|
||||
x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
|
||||
x);
|
||||
*cosval = __clc_mad(
|
||||
x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
|
||||
__clc_mad(x2, -0.5f, 1.0f));
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_INLINE _CLC_OVERLOAD void
|
||||
__clc_sincos_piby4(__CLC_GENTYPE x, __CLC_GENTYPE xx,
|
||||
private __CLC_GENTYPE *sinval,
|
||||
private __CLC_GENTYPE *cosval) {
|
||||
// Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
|
||||
// = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
|
||||
// = x * f(w)
|
||||
// where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
|
||||
// We use a minimax approximation of (f(w) - 1) / w
|
||||
// because this produces an expansion in even powers of x.
|
||||
// If xx (the tail of x) is non-zero, we add a correction
|
||||
// term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
|
||||
// is an approximation to cos(x)*sin(xx) valid because
|
||||
// xx is tiny relative to x.
|
||||
|
||||
// Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
|
||||
// = f(w)
|
||||
// where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
|
||||
// We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
|
||||
// because this produces an expansion in even powers of x.
|
||||
// If xx (the tail of x) is non-zero, we subtract a correction
|
||||
// term g(x,xx) = x*xx to the result, where g(x,xx)
|
||||
// is an approximation to sin(x)*sin(xx) valid because
|
||||
// xx is tiny relative to x.
|
||||
|
||||
const __CLC_GENTYPE sc1 = -0.166666666666666646259241729;
|
||||
const __CLC_GENTYPE sc2 = 0.833333333333095043065222816e-2;
|
||||
const __CLC_GENTYPE sc3 = -0.19841269836761125688538679e-3;
|
||||
const __CLC_GENTYPE sc4 = 0.275573161037288022676895908448e-5;
|
||||
const __CLC_GENTYPE sc5 = -0.25051132068021699772257377197e-7;
|
||||
const __CLC_GENTYPE sc6 = 0.159181443044859136852668200e-9;
|
||||
|
||||
const __CLC_GENTYPE cc1 = 0.41666666666666665390037e-1;
|
||||
const __CLC_GENTYPE cc2 = -0.13888888888887398280412e-2;
|
||||
const __CLC_GENTYPE cc3 = 0.248015872987670414957399e-4;
|
||||
const __CLC_GENTYPE cc4 = -0.275573172723441909470836e-6;
|
||||
const __CLC_GENTYPE cc5 = 0.208761463822329611076335e-8;
|
||||
const __CLC_GENTYPE cc6 = -0.113826398067944859590880e-10;
|
||||
|
||||
__CLC_GENTYPE x2 = x * x;
|
||||
__CLC_GENTYPE x3 = x2 * x;
|
||||
__CLC_GENTYPE r = (__CLC_GENTYPE)0.5 * x2;
|
||||
__CLC_GENTYPE t = (__CLC_GENTYPE)1.0 - r;
|
||||
|
||||
__CLC_GENTYPE sp = __clc_fma(
|
||||
__clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
|
||||
|
||||
__CLC_GENTYPE cp =
|
||||
t +
|
||||
__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5),
|
||||
x2, cc4),
|
||||
x2, cc3),
|
||||
x2, cc2),
|
||||
x2, cc1),
|
||||
x2 * x2, __clc_fma(x, xx, (1.0 - t) - r));
|
||||
|
||||
*sinval =
|
||||
x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx));
|
||||
*cosval = cp;
|
||||
}
|
||||
|
||||
_CLC_INLINE _CLC_OVERLOAD void __clc_tan_piby4(__CLC_GENTYPE x,
|
||||
__CLC_GENTYPE xx,
|
||||
private __CLC_GENTYPE *leadval,
|
||||
private __CLC_GENTYPE *tailval) {
|
||||
// 0x3fe921fb54442d18
|
||||
const __CLC_GENTYPE piby4_lead = 7.85398163397448278999e-01;
|
||||
// 0x3c81a62633145c06
|
||||
const __CLC_GENTYPE piby4_tail = 3.06161699786838240164e-17;
|
||||
|
||||
// In order to maintain relative precision transform using the identity:
|
||||
// tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
|
||||
// Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
|
||||
|
||||
__CLC_LONGN ca = x > 0.68;
|
||||
__CLC_LONGN cb = x < -0.68;
|
||||
__CLC_GENTYPE transform = ca ? 1.0 : 0.0;
|
||||
transform = cb ? -1.0 : transform;
|
||||
|
||||
__CLC_GENTYPE tx = __clc_fma(-transform, x, piby4_lead) +
|
||||
__clc_fma(-transform, xx, piby4_tail);
|
||||
__CLC_LONGN c = ca | cb;
|
||||
x = c ? tx : x;
|
||||
xx = c ? 0.0 : xx;
|
||||
|
||||
// Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
|
||||
__CLC_GENTYPE t1 = x;
|
||||
__CLC_GENTYPE r = __clc_fma(2.0, x * xx, x * x);
|
||||
|
||||
__CLC_GENTYPE a = __clc_fma(r,
|
||||
__clc_fma(r, 0.224044448537022097264602535574e-3,
|
||||
-0.229345080057565662883358588111e-1),
|
||||
0.372379159759792203640806338901e0);
|
||||
|
||||
__CLC_GENTYPE b =
|
||||
__clc_fma(r,
|
||||
__clc_fma(r,
|
||||
__clc_fma(r, -0.232371494088563558304549252913e-3,
|
||||
0.260656620398645407524064091208e-1),
|
||||
-0.515658515729031149329237816945e0),
|
||||
0.111713747927937668539901657944e1);
|
||||
|
||||
__CLC_GENTYPE t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
|
||||
|
||||
__CLC_GENTYPE tp = t1 + t2;
|
||||
|
||||
// Compute -1.0/(t1 + t2) accurately
|
||||
__CLC_GENTYPE z1 =
|
||||
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
|
||||
__CLC_GENTYPE z2 = t2 - (z1 - t1);
|
||||
__CLC_GENTYPE trec = -MATH_RECIP(tp);
|
||||
__CLC_GENTYPE trec_top =
|
||||
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
|
||||
|
||||
__CLC_GENTYPE tpr = __clc_fma(
|
||||
__clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
|
||||
|
||||
__CLC_GENTYPE tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
|
||||
__CLC_GENTYPE tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
|
||||
|
||||
*leadval = c ? tpt : tp;
|
||||
*tailval = c ? tptr : tpr;
|
||||
}
|
||||
|
||||
#endif
|
20
libclc/clc/include/clc/math/clc_sinpi.h
Normal file
20
libclc/clc/include/clc/math/clc_sinpi.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_SINPI_H__
|
||||
#define __CLC_MATH_CLC_SINPI_H__
|
||||
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_sinpi
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_SINPI_H__
|
20
libclc/clc/include/clc/math/clc_tanpi.h
Normal file
20
libclc/clc/include/clc/math/clc_tanpi.h
Normal file
@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_TANPI_H__
|
||||
#define __CLC_MATH_CLC_TANPI_H__
|
||||
|
||||
#define __CLC_BODY <clc/math/unary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_tanpi
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_TANPI_H__
|
@ -30,6 +30,7 @@ math/clc_atanh.cl
|
||||
math/clc_atanpi.cl
|
||||
math/clc_ceil.cl
|
||||
math/clc_copysign.cl
|
||||
math/clc_cospi.cl
|
||||
math/clc_ep_log.cl
|
||||
math/clc_fabs.cl
|
||||
math/clc_fma.cl
|
||||
@ -46,12 +47,14 @@ math/clc_mad.cl
|
||||
math/clc_modf.cl
|
||||
math/clc_nan.cl
|
||||
math/clc_native_cos.cl
|
||||
math/clc_native_divide.cl
|
||||
math/clc_native_exp.cl
|
||||
math/clc_native_exp2.cl
|
||||
math/clc_native_log.cl
|
||||
math/clc_native_log10.cl
|
||||
math/clc_native_log2.cl
|
||||
math/clc_native_rsqrt.cl
|
||||
math/clc_native_recip.cl
|
||||
math/clc_native_sin.cl
|
||||
math/clc_native_sqrt.cl
|
||||
math/clc_nextafter.cl
|
||||
@ -65,9 +68,11 @@ math/clc_rootn.cl
|
||||
math/clc_round.cl
|
||||
math/clc_rsqrt.cl
|
||||
math/clc_sincos_helpers.cl
|
||||
math/clc_sinpi.cl
|
||||
math/clc_sqrt.cl
|
||||
math/clc_sw_fma.cl
|
||||
math/clc_tables.cl
|
||||
math/clc_tanpi.cl
|
||||
math/clc_trunc.cl
|
||||
relational/clc_all.cl
|
||||
relational/clc_any.cl
|
||||
|
18
libclc/clc/lib/generic/math/clc_cospi.cl
Normal file
18
libclc/clc/lib/generic/math/clc_cospi.cl
Normal file
@ -0,0 +1,18 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_fabs.h>
|
||||
#include <clc/math/clc_sincos_helpers.h>
|
||||
#include <clc/math/clc_sincos_piby4.h>
|
||||
#include <clc/math/math.h>
|
||||
|
||||
#define __CLC_BODY <clc_cospi.inc>
|
||||
#include <clc/math/gentype.inc>
|
116
libclc/clc/lib/generic/math/clc_cospi.inc
Normal file
116
libclc/clc/lib/generic/math/clc_cospi.inc
Normal file
@ -0,0 +1,116 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_INTN ix = __CLC_AS_INTN(absx);
|
||||
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_INTN xodd = (iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0;
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_INTN ir = QNANBITPATT_SP32;
|
||||
|
||||
// 2^24 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < PINFBITPATT_SP32 ? 0x3f800000 : ir;
|
||||
|
||||
// 2^23 <= |x| < 2^24, the result is always integer
|
||||
ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0f - r;
|
||||
__CLC_INTN e = 1;
|
||||
__CLC_INTN s = xodd ^ (__CLC_INTN)0x80000000;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_INTN c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
s = c ? xodd : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
__CLC_GENTYPE sinval, cosval;
|
||||
__clc_sincos_piby4(a * M_PI_F, &sinval, &cosval);
|
||||
__CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? cosval : sinval);
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_LONGN ix = __CLC_AS_LONGN(absx);
|
||||
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_LONGN xodd =
|
||||
(iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L;
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_LONGN ir = QNANBITPATT_DP64;
|
||||
|
||||
// 2^53 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < PINFBITPATT_DP64 ? 0x3ff0000000000000L : ir;
|
||||
|
||||
// 2^52 <= |x| < 2^53, the result is always integer
|
||||
ir = absx < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0 - r;
|
||||
__CLC_LONGN e = 1;
|
||||
__CLC_LONGN s = xodd ^ (__CLC_LONGN)0x8000000000000000L;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_LONGN c = r <= 0.75;
|
||||
__CLC_GENTYPE t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
s = c ? xodd : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
__CLC_GENTYPE sinval, cosval;
|
||||
__clc_sincos_piby4(a * M_PI, 0.0, &sinval, &cosval);
|
||||
__CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval);
|
||||
|
||||
ir = absx < 0x1.0p+52 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_cospi(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
14
libclc/clc/lib/generic/math/clc_native_divide.cl
Normal file
14
libclc/clc/lib/generic/math/clc_native_divide.cl
Normal file
@ -0,0 +1,14 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/internal/clc.h>
|
||||
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <clc_native_divide.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
12
libclc/clc/lib/generic/math/clc_native_divide.inc
Normal file
12
libclc/clc/lib/generic/math/clc_native_divide.inc
Normal file
@ -0,0 +1,12 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_divide(__CLC_GENTYPE x,
|
||||
__CLC_GENTYPE y) {
|
||||
return x / y;
|
||||
}
|
@ -6,6 +6,9 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_divide(__CLC_GENTYPE x, __CLC_GENTYPE y) {
|
||||
return x / y;
|
||||
}
|
||||
#include <clc/internal/clc.h>
|
||||
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <clc_native_recip.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
@ -6,6 +6,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_recip(__CLC_GENTYPE val) {
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_recip(__CLC_GENTYPE val) {
|
||||
return 1.0f / val;
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_fma.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/clc_native_divide.h>
|
||||
#include <clc/math/clc_trunc.h>
|
||||
#include <clc/math/math.h>
|
||||
|
||||
|
@ -74,6 +74,25 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
|
||||
return ret;
|
||||
}
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
|
||||
__CLC_INTN regn) {
|
||||
// Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].
|
||||
__CLC_FLOATN r = x * x;
|
||||
|
||||
__CLC_FLOATN a =
|
||||
__clc_mad(r, -0.0172032480471481694693109f, 0.385296071263995406715129f);
|
||||
|
||||
__CLC_FLOATN b = __clc_mad(
|
||||
r,
|
||||
__clc_mad(r, 0.01844239256901656082986661f, -0.51396505478854532132342f),
|
||||
1.15588821434688393452299f);
|
||||
|
||||
__CLC_FLOATN t = __clc_mad(x * r, __clc_native_divide(a, b), x);
|
||||
__CLC_FLOATN tr = -MATH_RECIP(t);
|
||||
|
||||
return regn & 1 ? tr : t;
|
||||
}
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD void __clc_fullMulS(private __CLC_FLOATN *hi,
|
||||
private __CLC_FLOATN *lo,
|
||||
__CLC_FLOATN a, __CLC_FLOATN b,
|
||||
|
18
libclc/clc/lib/generic/math/clc_sinpi.cl
Normal file
18
libclc/clc/lib/generic/math/clc_sinpi.cl
Normal file
@ -0,0 +1,18 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_fabs.h>
|
||||
#include <clc/math/clc_sincos_helpers.h>
|
||||
#include <clc/math/clc_sincos_piby4.h>
|
||||
#include <clc/math/math.h>
|
||||
|
||||
#define __CLC_BODY <clc_sinpi.inc>
|
||||
#include <clc/math/gentype.inc>
|
114
libclc/clc/lib/generic/math/clc_sinpi.inc
Normal file
114
libclc/clc/lib/generic/math/clc_sinpi.inc
Normal file
@ -0,0 +1,114 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
|
||||
__CLC_INTN ix = __CLC_AS_INTN(x);
|
||||
__CLC_INTN xsgn = ix & (__CLC_INTN)0x80000000;
|
||||
ix ^= xsgn;
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_INTN xodd =
|
||||
xsgn ^ ((iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_INTN ir = QNANBITPATT_SP32;
|
||||
|
||||
// 2^23 <= |x| < Inf, the result is always integer
|
||||
ir = ix < PINFBITPATT_SP32 ? xsgn : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0f - r;
|
||||
__CLC_INTN e = 0;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_INTN c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
|
||||
// 0 < r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
__CLC_GENTYPE sinval, cosval;
|
||||
__clc_sincos_piby4(a * M_PI_F, &sinval, &cosval);
|
||||
__CLC_INTN jr = xodd ^ __CLC_AS_INTN(e != 0 ? cosval : sinval);
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
|
||||
__CLC_LONGN ix = __CLC_AS_LONGN(x);
|
||||
__CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L;
|
||||
ix ^= xsgn;
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_LONGN xodd =
|
||||
xsgn ^
|
||||
((iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_LONGN ir = QNANBITPATT_DP64;
|
||||
|
||||
// 2^23 <= |x| < Inf, the result is always integer
|
||||
ir = ix < PINFBITPATT_DP64 ? xsgn : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0 - r;
|
||||
__CLC_LONGN e = 0;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_LONGN c = r <= 0.75;
|
||||
__CLC_GENTYPE t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
__CLC_GENTYPE api = a * M_PI;
|
||||
|
||||
__CLC_GENTYPE sinval, cosval;
|
||||
__clc_sincos_piby4(api, 0.0, &sinval, &cosval);
|
||||
__CLC_LONGN jr = xodd ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval);
|
||||
|
||||
ir = absx < 0x1.0p+52 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_sinpi(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
19
libclc/clc/lib/generic/math/clc_tanpi.cl
Normal file
19
libclc/clc/lib/generic/math/clc_tanpi.cl
Normal file
@ -0,0 +1,19 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_fabs.h>
|
||||
#include <clc/math/clc_native_recip.h>
|
||||
#include <clc/math/clc_sincos_helpers.h>
|
||||
#include <clc/math/clc_sincos_piby4.h>
|
||||
#include <clc/math/math.h>
|
||||
|
||||
#define __CLC_BODY <clc_tanpi.inc>
|
||||
#include <clc/math/gentype.inc>
|
132
libclc/clc/lib/generic/math/clc_tanpi.inc
Normal file
132
libclc/clc/lib/generic/math/clc_tanpi.inc
Normal file
@ -0,0 +1,132 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
|
||||
__CLC_INTN ix = __CLC_AS_INTN(x);
|
||||
__CLC_INTN xsgn = ix & (__CLC_INTN)SIGNBIT_SP32;
|
||||
__CLC_INTN xnsgn = xsgn ^ (__CLC_INTN)SIGNBIT_SP32;
|
||||
ix ^= xsgn;
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_INTN iax = __CLC_CONVERT_INTN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_INTN xodd = xsgn ^ __CLC_AS_INTN((iax & 0x1) != 0 ? SIGNBIT_SP32 : 0);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_INTN ir = QNANBITPATT_SP32;
|
||||
|
||||
// 2^24 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < PINFBITPATT_SP32 ? xsgn : ir;
|
||||
|
||||
// 2^23 <= |x| < 2^24, the result is always integer
|
||||
ir = ix < 0x4b800000 ? xodd : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0f - r;
|
||||
__CLC_INTN e = 0;
|
||||
__CLC_INTN s = xnsgn;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_INTN c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 1 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
s = c ? xnsgn : s;
|
||||
|
||||
// 0 < r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
__CLC_GENTYPE t = __clc_tanf_piby4(a * M_PI_F, 0);
|
||||
__CLC_GENTYPE tr = -__clc_native_recip(t);
|
||||
__CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? tr : t);
|
||||
|
||||
jr = r == 0.5f ? xodd | 0x7f800000 : jr;
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
|
||||
__CLC_LONGN ix = __CLC_AS_LONGN(x);
|
||||
__CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L;
|
||||
__CLC_LONGN xnsgn = xsgn ^ (__CLC_LONGN)0x8000000000000000L;
|
||||
ix ^= xsgn;
|
||||
__CLC_GENTYPE absx = __clc_fabs(x);
|
||||
__CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
|
||||
__CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
|
||||
__CLC_LONGN xodd =
|
||||
xsgn ^ __CLC_AS_LONGN((iax & 0x1) != 0 ? 0x8000000000000000L : 0L);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
__CLC_LONGN ir = QNANBITPATT_DP64;
|
||||
|
||||
// 2^53 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < PINFBITPATT_DP64 ? xsgn : ir;
|
||||
|
||||
// 2^52 <= |x| < 2^53, the result is always integer
|
||||
ir = ix < 0x4340000000000000L ? xodd : ir;
|
||||
|
||||
// 0x1.0p-14 <= |x| < 2^53, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
__CLC_GENTYPE a = 1.0 - r;
|
||||
__CLC_LONGN e = 0;
|
||||
__CLC_LONGN s = xnsgn;
|
||||
|
||||
// r <= 0.75
|
||||
__CLC_LONGN c = r <= 0.75;
|
||||
__CLC_GENTYPE t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 1 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
s = c ? xnsgn : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
__CLC_GENTYPE api = a * M_PI;
|
||||
__CLC_GENTYPE lo, hi;
|
||||
__clc_tan_piby4(api, 0.0, &lo, &hi);
|
||||
__CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? hi : lo);
|
||||
|
||||
__CLC_LONGN si = xodd | 0x7ff0000000000000L;
|
||||
jr = r == 0.5 ? si : jr;
|
||||
|
||||
ir = ix < 0x4330000000000000L ? jr : ir;
|
||||
|
||||
return __CLC_AS_GENTYPE(ir);
|
||||
}
|
||||
|
||||
#elif __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_tanpi(__CLC_CONVERT_FLOATN(x)));
|
||||
}
|
||||
|
||||
#endif
|
@ -18,7 +18,6 @@ subnormal_config.cl
|
||||
../../generic/lib/math/cbrt.cl
|
||||
../../generic/lib/math/clc_exp10.cl
|
||||
../../generic/lib/math/clc_tan.cl
|
||||
../../generic/lib/math/clc_tanpi.cl
|
||||
../../generic/lib/math/cos.cl
|
||||
../../generic/lib/math/cosh.cl
|
||||
../../generic/lib/math/cospi.cl
|
||||
|
@ -174,7 +174,6 @@ math/sqrt.cl
|
||||
math/clc_tan.cl
|
||||
math/tan.cl
|
||||
math/tanh.cl
|
||||
math/clc_tanpi.cl
|
||||
math/tanpi.cl
|
||||
math/tgamma.cl
|
||||
math/trunc.cl
|
||||
|
@ -35,7 +35,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_tan(float x) {
|
||||
_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tan, float);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#include "sincosD_piby4.h"
|
||||
#include <clc/math/clc_sincos_piby4.h>
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) {
|
||||
double y = __clc_fabs(x);
|
||||
@ -48,9 +48,10 @@ _CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) {
|
||||
else
|
||||
__clc_remainder_piby2_large(y, &r, &rr, ®n);
|
||||
|
||||
double2 tt = __clc_tan_piby4(r, rr);
|
||||
double lead, tail;
|
||||
__clc_tan_piby4(r, rr, &lead, &tail);
|
||||
|
||||
int2 t = as_int2(regn & 1 ? tt.y : tt.x);
|
||||
int2 t = as_int2(regn & 1 ? tail : lead);
|
||||
t.hi ^= (x < 0.0) << 31;
|
||||
|
||||
return __clc_isnan(x) || __clc_isinf(x) ? as_double(QNANBITPATT_DP64)
|
||||
|
@ -1,132 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "sincos_helpers.h"
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/tables.h>
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD float __clc_tanpi(float x)
|
||||
{
|
||||
int ix = as_int(x);
|
||||
int xsgn = ix & 0x80000000;
|
||||
int xnsgn = xsgn ^ 0x80000000;
|
||||
ix ^= xsgn;
|
||||
float ax = as_float(ix);
|
||||
int iax = (int)ax;
|
||||
float r = ax - iax;
|
||||
int xodd = xsgn ^ (iax & 0x1 ? 0x80000000 : 0);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
int ir = 0x7fc00000;
|
||||
|
||||
// 2^24 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < 0x7f800000 ? xsgn : ir;
|
||||
|
||||
// 2^23 <= |x| < 2^24, the result is always integer
|
||||
ir = ix < 0x4b800000 ? xodd : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
float a = 1.0f - r;
|
||||
int e = 0;
|
||||
int s = xnsgn;
|
||||
|
||||
// r <= 0.75
|
||||
int c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 1 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
s = c ? xnsgn : s;
|
||||
|
||||
// 0 < r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
float t = __clc_tanf_piby4(a * M_PI_F, 0);
|
||||
float tr = -native_recip(t);
|
||||
int jr = s ^ as_int(e ? tr : t);
|
||||
|
||||
jr = r == 0.5f ? xodd | 0x7f800000 : jr;
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return as_float(ir);
|
||||
}
|
||||
_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tanpi, float);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#include "sincosD_piby4.h"
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD double __clc_tanpi(double x)
|
||||
{
|
||||
long ix = as_long(x);
|
||||
long xsgn = ix & 0x8000000000000000L;
|
||||
long xnsgn = xsgn ^ 0x8000000000000000L;
|
||||
ix ^= xsgn;
|
||||
double ax = as_double(ix);
|
||||
long iax = (long)ax;
|
||||
double r = ax - iax;
|
||||
long xodd = xsgn ^ (iax & 0x1 ? 0x8000000000000000L : 0L);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
long ir = 0x7ff8000000000000L;
|
||||
|
||||
// 2^53 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < 0x7ff0000000000000L ? xsgn : ir;
|
||||
|
||||
// 2^52 <= |x| < 2^53, the result is always integer
|
||||
ir = ix < 0x4340000000000000L ? xodd : ir;
|
||||
|
||||
// 0x1.0p-14 <= |x| < 2^53, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
double a = 1.0 - r;
|
||||
int e = 0;
|
||||
long s = xnsgn;
|
||||
|
||||
// r <= 0.75
|
||||
int c = r <= 0.75;
|
||||
double t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 1 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
s = c ? xnsgn : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
s = c ? xsgn : s;
|
||||
|
||||
double api = a * M_PI;
|
||||
double2 tt = __clc_tan_piby4(api, 0.0);
|
||||
long jr = s ^ as_long(e ? tt.hi : tt.lo);
|
||||
|
||||
long si = xodd | 0x7ff0000000000000L;
|
||||
jr = r == 0.5 ? si : jr;
|
||||
|
||||
ir = ix < 0x4330000000000000L ? jr : ir;
|
||||
|
||||
return as_double(ir);
|
||||
}
|
||||
_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_tanpi, double);
|
||||
#endif
|
@ -7,124 +7,9 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/clc_cospi.h>
|
||||
|
||||
#include "sincos_helpers.h"
|
||||
#include "sincospiF_piby4.h"
|
||||
#ifdef cl_khr_fp64
|
||||
#include "sincosD_piby4.h"
|
||||
#endif
|
||||
#define FUNCTION cospi
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float cospi(float x)
|
||||
{
|
||||
int ix = as_int(x) & 0x7fffffff;
|
||||
float ax = as_float(ix);
|
||||
int iax = (int)ax;
|
||||
float r = ax - iax;
|
||||
int xodd = iax & 0x1 ? 0x80000000 : 0;
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
int ir = 0x7fc00000;
|
||||
|
||||
// 2^24 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < 0x7f800000 ? 0x3f800000 : ir;
|
||||
|
||||
// 2^23 <= |x| < 2^24, the result is always integer
|
||||
ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
float a = 1.0f - r;
|
||||
int e = 1;
|
||||
int s = xodd ^ 0x80000000;
|
||||
|
||||
// r <= 0.75
|
||||
int c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
s = c ? xodd : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
float2 t = __libclc__sincosf_piby4(a * M_PI_F);
|
||||
int jr = s ^ as_int(e ? t.hi : t.lo);
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return as_float(ir);
|
||||
}
|
||||
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cospi, float);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double cospi(double x) {
|
||||
|
||||
long ix = as_long(x) & 0x7fffffffffffffffL;
|
||||
double ax = as_double(ix);
|
||||
long iax = (long)ax;
|
||||
double r = ax - (double)iax;
|
||||
long xodd = iax & 0x1L ? 0x8000000000000000L : 0L;
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
long ir = 0x7ff8000000000000L;
|
||||
|
||||
// 2^53 <= |x| < Inf, the result is always even integer
|
||||
ir = ix < 0x7ff0000000000000 ? 0x3ff0000000000000L : ir;
|
||||
|
||||
// 2^52 <= |x| < 2^53, the result is always integer
|
||||
ir = ax < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
double a = 1.0 - r;
|
||||
int e = 1;
|
||||
long s = xodd ^ 0x8000000000000000L;
|
||||
|
||||
// r <= 0.75
|
||||
int c = r <= 0.75;
|
||||
double t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
s = c ? xodd : s;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
double2 sc = __libclc__sincos_piby4(a * M_PI, 0.0);
|
||||
long jr = s ^ as_long(e ? sc.hi : sc.lo);
|
||||
|
||||
ir = ax < 0x1.0p+52 ? jr : ir;
|
||||
|
||||
return as_double(ir);
|
||||
}
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN_FP16(cospi)
|
||||
|
||||
#endif
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -7,7 +7,10 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/math/clc_native_divide.h>
|
||||
|
||||
#define __CLC_BODY <native_divide.inc>
|
||||
#define __FLOAT_ONLY
|
||||
#define FUNCTION native_divide
|
||||
#define __CLC_BODY <clc/shared/binary_def.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -7,7 +7,10 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/math/clc_native_recip.h>
|
||||
|
||||
#define __CLC_BODY <native_recip.inc>
|
||||
#define __FLOAT_ONLY
|
||||
#define FUNCTION native_recip
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -1,119 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_INLINE double2
|
||||
__libclc__sincos_piby4(double x, double xx)
|
||||
{
|
||||
// Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
|
||||
// = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
|
||||
// = x * f(w)
|
||||
// where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
|
||||
// We use a minimax approximation of (f(w) - 1) / w
|
||||
// because this produces an expansion in even powers of x.
|
||||
// If xx (the tail of x) is non-zero, we add a correction
|
||||
// term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
|
||||
// is an approximation to cos(x)*sin(xx) valid because
|
||||
// xx is tiny relative to x.
|
||||
|
||||
// Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
|
||||
// = f(w)
|
||||
// where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
|
||||
// We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
|
||||
// because this produces an expansion in even powers of x.
|
||||
// If xx (the tail of x) is non-zero, we subtract a correction
|
||||
// term g(x,xx) = x*xx to the result, where g(x,xx)
|
||||
// is an approximation to sin(x)*sin(xx) valid because
|
||||
// xx is tiny relative to x.
|
||||
|
||||
const double sc1 = -0.166666666666666646259241729;
|
||||
const double sc2 = 0.833333333333095043065222816e-2;
|
||||
const double sc3 = -0.19841269836761125688538679e-3;
|
||||
const double sc4 = 0.275573161037288022676895908448e-5;
|
||||
const double sc5 = -0.25051132068021699772257377197e-7;
|
||||
const double sc6 = 0.159181443044859136852668200e-9;
|
||||
|
||||
const double cc1 = 0.41666666666666665390037e-1;
|
||||
const double cc2 = -0.13888888888887398280412e-2;
|
||||
const double cc3 = 0.248015872987670414957399e-4;
|
||||
const double cc4 = -0.275573172723441909470836e-6;
|
||||
const double cc5 = 0.208761463822329611076335e-8;
|
||||
const double cc6 = -0.113826398067944859590880e-10;
|
||||
|
||||
double x2 = x * x;
|
||||
double x3 = x2 * x;
|
||||
double r = 0.5 * x2;
|
||||
double t = 1.0 - r;
|
||||
|
||||
double sp = fma(fma(fma(fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
|
||||
|
||||
double cp = t + fma(fma(fma(fma(fma(fma(cc6, x2, cc5), x2, cc4), x2, cc3), x2, cc2), x2, cc1),
|
||||
x2*x2, fma(x, xx, (1.0 - t) - r));
|
||||
|
||||
double2 ret;
|
||||
ret.lo = x - fma(-x3, sc1, fma(fma(-x3, sp, 0.5*xx), x2, -xx));
|
||||
ret.hi = cp;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
_CLC_INLINE double2
|
||||
__clc_tan_piby4(double x, double xx)
|
||||
{
|
||||
const double piby4_lead = 7.85398163397448278999e-01; // 0x3fe921fb54442d18
|
||||
const double piby4_tail = 3.06161699786838240164e-17; // 0x3c81a62633145c06
|
||||
|
||||
// In order to maintain relative precision transform using the identity:
|
||||
// tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
|
||||
// Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
|
||||
|
||||
int ca = x > 0.68;
|
||||
int cb = x < -0.68;
|
||||
double transform = ca ? 1.0 : 0.0;
|
||||
transform = cb ? -1.0 : transform;
|
||||
|
||||
double tx = fma(-transform, x, piby4_lead) + fma(-transform, xx, piby4_tail);
|
||||
int c = ca | cb;
|
||||
x = c ? tx : x;
|
||||
xx = c ? 0.0 : xx;
|
||||
|
||||
// Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
|
||||
double t1 = x;
|
||||
double r = fma(2.0, x*xx, x*x);
|
||||
|
||||
double a = fma(r,
|
||||
fma(r, 0.224044448537022097264602535574e-3, -0.229345080057565662883358588111e-1),
|
||||
0.372379159759792203640806338901e0);
|
||||
|
||||
double b = fma(r,
|
||||
fma(r,
|
||||
fma(r, -0.232371494088563558304549252913e-3, 0.260656620398645407524064091208e-1),
|
||||
-0.515658515729031149329237816945e0),
|
||||
0.111713747927937668539901657944e1);
|
||||
|
||||
double t2 = fma(MATH_DIVIDE(a, b), x*r, xx);
|
||||
|
||||
double tp = t1 + t2;
|
||||
|
||||
// Compute -1.0/(t1 + t2) accurately
|
||||
double z1 = as_double(as_long(tp) & 0xffffffff00000000L);
|
||||
double z2 = t2 - (z1 - t1);
|
||||
double trec = -MATH_RECIP(tp);
|
||||
double trec_top = as_double(as_long(trec) & 0xffffffff00000000L);
|
||||
|
||||
double tpr = fma(fma(trec_top, z2, fma(trec_top, z1, 1.0)), trec, trec_top);
|
||||
|
||||
double tpt = transform * (1.0 - MATH_DIVIDE(2.0*tp, 1.0 + tp));
|
||||
double tptr = transform * (MATH_DIVIDE(2.0*tp, tp - 1.0) - 1.0);
|
||||
|
||||
double2 ret;
|
||||
ret.lo = c ? tpt : tp;
|
||||
ret.hi = c ? tptr : tpr;
|
||||
return ret;
|
||||
}
|
@ -17,31 +17,13 @@
|
||||
#include <clc/math/tables.h>
|
||||
#include <clc/shared/clc_max.h>
|
||||
|
||||
#define bytealign(src0, src1, src2) \
|
||||
((uint)(((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3) * 8)))
|
||||
|
||||
_CLC_DEF float __clc_tanf_piby4(float x, int regn) {
|
||||
// Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].
|
||||
float r = x * x;
|
||||
|
||||
float a =
|
||||
__clc_mad(r, -0.0172032480471481694693109f, 0.385296071263995406715129f);
|
||||
|
||||
float b = __clc_mad(
|
||||
r,
|
||||
__clc_mad(r, 0.01844239256901656082986661f, -0.51396505478854532132342f),
|
||||
1.15588821434688393452299f);
|
||||
|
||||
float t = __clc_mad(x * r, native_divide(a, b), x);
|
||||
float tr = -MATH_RECIP(t);
|
||||
|
||||
return regn & 1 ? tr : t;
|
||||
}
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
#define bytealign(src0, src1, src2) \
|
||||
((uint)(((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3) * 8)))
|
||||
|
||||
// Reduction for medium sized arguments
|
||||
_CLC_DEF void __clc_remainder_piby2_medium(double x, private double *r,
|
||||
private double *rr,
|
||||
|
@ -9,8 +9,6 @@
|
||||
#include <clc/clcfunc.h>
|
||||
#include <clc/clctypes.h>
|
||||
|
||||
_CLC_DECL float __clc_tanf_piby4(float x, int y);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
@ -1,46 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/math/clc_mad.h>
|
||||
|
||||
// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
|
||||
_CLC_INLINE float2 __libclc__sincosf_piby4(float x) {
|
||||
// Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
|
||||
// = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
|
||||
// = x * f(w)
|
||||
// where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
|
||||
// We use a minimax approximation of (f(w) - 1) / w
|
||||
// because this produces an expansion in even powers of x.
|
||||
|
||||
// Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
|
||||
// = f(w)
|
||||
// where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
|
||||
// We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
|
||||
// because this produces an expansion in even powers of x.
|
||||
|
||||
const float sc1 = -0.166666666638608441788607926e0F;
|
||||
const float sc2 = 0.833333187633086262120839299e-2F;
|
||||
const float sc3 = -0.198400874359527693921333720e-3F;
|
||||
const float sc4 = 0.272500015145584081596826911e-5F;
|
||||
|
||||
const float cc1 = 0.41666666664325175238031e-1F;
|
||||
const float cc2 = -0.13888887673175665567647e-2F;
|
||||
const float cc3 = 0.24800600878112441958053e-4F;
|
||||
const float cc4 = -0.27301013343179832472841e-6F;
|
||||
|
||||
float x2 = x * x;
|
||||
|
||||
float2 ret;
|
||||
ret.x = __clc_mad(
|
||||
x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
|
||||
x);
|
||||
ret.y = __clc_mad(
|
||||
x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
|
||||
__clc_mad(x2, -0.5f, 1.0f));
|
||||
return ret;
|
||||
}
|
@ -7,119 +7,9 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/clc_sinpi.h>
|
||||
|
||||
#include "sincospiF_piby4.h"
|
||||
#ifdef cl_khr_fp64
|
||||
#include "sincosD_piby4.h"
|
||||
#endif
|
||||
#define FUNCTION sinpi
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float sinpi(float x)
|
||||
{
|
||||
int ix = as_int(x);
|
||||
int xsgn = ix & 0x80000000;
|
||||
ix ^= xsgn;
|
||||
float ax = as_float(ix);
|
||||
int iax = (int)ax;
|
||||
float r = ax - iax;
|
||||
int xodd = xsgn ^ (iax & 0x1 ? 0x80000000 : 0);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
int ir = 0x7fc00000;
|
||||
|
||||
// 2^23 <= |x| < Inf, the result is always integer
|
||||
ir = ix < 0x7f800000 ? xsgn : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
float a = 1.0f - r;
|
||||
int e = 0;
|
||||
|
||||
// r <= 0.75
|
||||
int c = r <= 0.75f;
|
||||
a = c ? r - 0.5f : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5f;
|
||||
a = c ? 0.5f - r : a;
|
||||
|
||||
// 0 < r <= 0.25
|
||||
c = r <= 0.25f;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
float2 t = __libclc__sincosf_piby4(a * M_PI_F);
|
||||
int jr = xodd ^ as_int(e ? t.hi : t.lo);
|
||||
|
||||
ir = ix < 0x4b000000 ? jr : ir;
|
||||
|
||||
return as_float(ir);
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinpi, float);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double sinpi(double x)
|
||||
{
|
||||
long ix = as_long(x);
|
||||
long xsgn = ix & 0x8000000000000000L;
|
||||
ix ^= xsgn;
|
||||
double ax = as_double(ix);
|
||||
long iax = (long)ax;
|
||||
double r = ax - (double)iax;
|
||||
long xodd = xsgn ^ (iax & 0x1L ? 0x8000000000000000L : 0L);
|
||||
|
||||
// Initialize with return for +-Inf and NaN
|
||||
long ir = 0x7ff8000000000000L;
|
||||
|
||||
// 2^23 <= |x| < Inf, the result is always integer
|
||||
ir = ix < 0x7ff0000000000000 ? xsgn : ir;
|
||||
|
||||
// 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
|
||||
|
||||
// r < 1.0
|
||||
double a = 1.0 - r;
|
||||
int e = 0;
|
||||
|
||||
// r <= 0.75
|
||||
int c = r <= 0.75;
|
||||
double t = r - 0.5;
|
||||
a = c ? t : a;
|
||||
e = c ? 1 : e;
|
||||
|
||||
// r < 0.5
|
||||
c = r < 0.5;
|
||||
t = 0.5 - r;
|
||||
a = c ? t : a;
|
||||
|
||||
// r <= 0.25
|
||||
c = r <= 0.25;
|
||||
a = c ? r : a;
|
||||
e = c ? 0 : e;
|
||||
|
||||
double api = a * M_PI;
|
||||
double2 sc = __libclc__sincos_piby4(api, 0.0);
|
||||
long jr = xodd ^ as_long(e ? sc.hi : sc.lo);
|
||||
|
||||
ir = ax < 0x1.0p+52 ? jr : ir;
|
||||
|
||||
return as_double(ir);
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi)
|
||||
|
||||
#endif
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -7,9 +7,9 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/math/clc_tanpi.h>
|
||||
|
||||
#include <math/clc_tanpi.h>
|
||||
#define FUNCTION tanpi
|
||||
#define __CLC_BODY <clc/shared/unary_def.inc>
|
||||
|
||||
#define __CLC_FUNC tanpi
|
||||
#define __CLC_BODY <clc_sw_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -72,7 +72,6 @@ math/fma.cl
|
||||
../../generic/lib/math/clc_tan.cl
|
||||
../../generic/lib/math/tan.cl
|
||||
../../generic/lib/math/tanh.cl
|
||||
../../generic/lib/math/clc_tanpi.cl
|
||||
../../generic/lib/math/tanpi.cl
|
||||
../../generic/lib/math/tgamma.cl
|
||||
../../generic/lib/shared/vload.cl
|
||||
|
Loading…
x
Reference in New Issue
Block a user