[libclc] Move sinh, cosh & tanh to the CLC library (#134063)

This commit also vectorizes the builtins.
2025-04-15 22:06:32 +00:00 · 2025-04-02 15:22:42 +01:00 · 2025-04-02 15:22:42 +01:00 · f186041553
commit f186041553
parent d51525ba36
16 changed files with 786 additions and 611 deletions
--- a/libclc/clc/include/clc/math/clc_cosh.h
+++ b/libclc/clc/include/clc/math/clc_cosh.h
@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_COSH_H__
+#define __CLC_MATH_CLC_COSH_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_cosh
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_COSH_H__
--- a/libclc/clc/include/clc/math/clc_sinh.h
+++ b/libclc/clc/include/clc/math/clc_sinh.h
@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_SINH_H__
+#define __CLC_MATH_CLC_SINH_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_sinh
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_SINH_H__
--- a/libclc/clc/include/clc/math/clc_tanh.h
+++ b/libclc/clc/include/clc/math/clc_tanh.h
@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_TANH_H__
+#define __CLC_MATH_CLC_TANH_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_tanh
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_TANH_H__
--- a/libclc/clc/include/clc/math/tables.h
+++ b/libclc/clc/include/clc/math/tables.h
@ -62,7 +62,6 @@
 TABLE_FUNCTION_DECL(float2, log2_tbl);
 TABLE_FUNCTION_DECL(float2, log10_tbl);
 TABLE_FUNCTION_DECL(uint4, pibits_tbl);
-TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);

 CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_head);
 CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_tail);
@ -74,6 +73,8 @@ CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_head);
 CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_tail);
 CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_head);
 CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_tail);
+CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_head);
+CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_tail);

 #ifdef cl_khr_fp64

@ -85,8 +86,10 @@ CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_head);
 CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_tail);
 CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_head);
 CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_tail);
-TABLE_FUNCTION_DECL(double2, sinh_tbl);
-TABLE_FUNCTION_DECL(double2, cosh_tbl);
+CLC_TABLE_FUNCTION_DECL(double, sinh_tbl_head);
+CLC_TABLE_FUNCTION_DECL(double, sinh_tbl_tail);
+CLC_TABLE_FUNCTION_DECL(double, cosh_tbl_head);
+CLC_TABLE_FUNCTION_DECL(double, cosh_tbl_tail);
 CLC_TABLE_FUNCTION_DECL(double, cbrt_inv_tbl);
 CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_head);
 CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_tail);
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@ -31,6 +31,7 @@ math/clc_atanpi.cl
 math/clc_cbrt.cl
 math/clc_ceil.cl
 math/clc_copysign.cl
+math/clc_cosh.cl
 math/clc_cospi.cl
 math/clc_ep_log.cl
 math/clc_exp.cl
@ -76,10 +77,12 @@ math/clc_rootn.cl
 math/clc_round.cl
 math/clc_rsqrt.cl
 math/clc_sincos_helpers.cl
+math/clc_sinh.cl
 math/clc_sinpi.cl
 math/clc_sqrt.cl
 math/clc_sw_fma.cl
 math/clc_tables.cl
+math/clc_tanh.cl
 math/clc_tanpi.cl
 math/clc_tgamma.cl
 math/clc_trunc.cl
--- a/libclc/clc/lib/generic/math/clc_cosh.cl
+++ b/libclc/clc/lib/generic/math/clc_cosh.cl
@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_copysign.h>
+#include <clc/math/clc_exp.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_min.h>
+
+#define __CLC_BODY <clc_cosh.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_cosh.inc
+++ b/libclc/clc/lib/generic/math/clc_cosh.inc
@ -0,0 +1,199 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
+  // After dealing with special cases the computation is split into regions as
+  // follows. abs(x) >= max_cosh_arg: cosh(x) = sign(x)*Inf abs(x) >=
+  // small_threshold: cosh(x) = sign(x)*exp(abs(x))/2 computed using the
+  // splitexp and scaleDouble functions as for exp_amd().
+  // abs(x) < small_threshold:
+  // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
+  // cosh(x) is then z.
+
+  const __CLC_GENTYPE max_cosh_arg = 0x1.65a9fap+6f;
+  const __CLC_GENTYPE small_threshold = 0x1.0a2b24p+3f;
+
+  __CLC_UINTN ux = __CLC_AS_UINTN(x);
+  __CLC_GENTYPE y = __clc_fabs(x);
+  __CLC_UINTN aux = __CLC_AS_UINTN(y);
+
+  // Find the integer part y0 of y and the increment dy = y - y0. We then
+  // compute z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) z = cosh(y) =
+  // cosh(y0)cosh(dy) + sinh(y0)sinh(dy) where sinh(y0) and cosh(y0) are
+  // tabulated above.
+
+  __CLC_INTN ind = __CLC_CONVERT_INTN(y);
+  ind = __CLC_CONVERT_UINTN(ind) > 36U ? 0 : ind;
+
+  __CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
+  __CLC_GENTYPE dy2 = dy * dy;
+
+  __CLC_GENTYPE sdy = __clc_mad(
+      dy2,
+      __clc_mad(
+          dy2,
+          __clc_mad(
+              dy2,
+              __clc_mad(
+                  dy2,
+                  __clc_mad(dy2,
+                            __clc_mad(dy2, 0.7746188980094184251527126e-12f,
+                                      0.160576793121939886190847e-9f),
+                            0.250521176994133472333666e-7f),
+                  0.275573191913636406057211e-5f),
+              0.198412698413242405162014e-3f),
+          0.833333333333329931873097e-2f),
+      0.166666666666666667013899e0f);
+  sdy = __clc_mad(sdy, dy * dy2, dy);
+
+  __CLC_GENTYPE cdy = __clc_mad(
+      dy2,
+      __clc_mad(
+          dy2,
+          __clc_mad(
+              dy2,
+              __clc_mad(
+                  dy2,
+                  __clc_mad(dy2,
+                            __clc_mad(dy2, 0.1163921388172173692062032e-10f,
+                                      0.208744349831471353536305e-8f),
+                            0.275573350756016588011357e-6f),
+                  0.248015872460622433115785e-4f),
+              0.138888888889814854814536e-2f),
+          0.416666666666660876512776e-1f),
+      0.500000000000000005911074e0f);
+  cdy = __clc_mad(cdy, dy2, 1.0f);
+
+  __CLC_GENTYPE sinhcoshh = USE_TABLE(sinhcosh_tbl_head, ind);
+  __CLC_GENTYPE sinhcosht = USE_TABLE(sinhcosh_tbl_tail, ind);
+  __CLC_GENTYPE z = __clc_mad(sinhcoshh, sdy, sinhcosht * cdy);
+
+  // When exp(-x) is insignificant compared to exp(x), return exp(x)/2
+  __CLC_GENTYPE t = __clc_exp(y - 0x1.62e500p-1f);
+  __CLC_GENTYPE zsmall = __clc_mad(0x1.a0210ep-18f, t, t);
+  z = y >= small_threshold ? zsmall : z;
+
+  // Corner cases
+  z = y >= max_cosh_arg ? __CLC_AS_GENTYPE((__CLC_UINTN)PINFBITPATT_SP32) : z;
+  z = aux > PINFBITPATT_SP32 ? __CLC_GENTYPE_NAN : z;
+  z = aux < 0x38800000 ? 1.0f : z;
+
+  return z;
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
+  // After dealing with special cases the computation is split into
+  // regions as follows:
+  //
+  // abs(x) >= max_cosh_arg:
+  // cosh(x) = sign(x)*Inf
+  //
+  // abs(x) >= small_threshold:
+  // cosh(x) = sign(x)*exp(abs(x))/2 computed using the
+  // splitexp and scaleDouble functions as for exp_amd().
+  //
+  // abs(x) < small_threshold:
+  // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
+  // cosh(x) is then sign(x)*z.
+
+  // This is ln(2^1025) = 0x408633ce8fb9f87e
+  const __CLC_GENTYPE max_cosh_arg = 7.10475860073943977113e+02;
+
+  // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
+  const __CLC_GENTYPE small_threshold = 0x1.2b708872320e2p+4;
+
+  __CLC_GENTYPE y = __clc_fabs(x);
+
+  // In this range we find the integer part y0 of y
+  // and the increment dy = y - y0. We then compute
+  // z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
+  // where sinh(y0) and cosh(y0) are tabulated above.
+
+  __CLC_INTN ind = __clc_min(__CLC_CONVERT_INTN(y), 36);
+  __CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
+  __CLC_GENTYPE dy2 = dy * dy;
+
+  __CLC_GENTYPE sdy =
+      dy * dy2 *
+      __clc_fma(
+          dy2,
+          __clc_fma(
+              dy2,
+              __clc_fma(
+                  dy2,
+                  __clc_fma(
+                      dy2,
+                      __clc_fma(dy2,
+                                __clc_fma(dy2, 0.7746188980094184251527126e-12,
+                                          0.160576793121939886190847e-9),
+                                0.250521176994133472333666e-7),
+                      0.275573191913636406057211e-5),
+                  0.198412698413242405162014e-3),
+              0.833333333333329931873097e-2),
+          0.166666666666666667013899e0);
+
+  __CLC_GENTYPE cdy =
+      dy2 *
+      __clc_fma(
+          dy2,
+          __clc_fma(
+              dy2,
+              __clc_fma(
+                  dy2,
+                  __clc_fma(
+                      dy2,
+                      __clc_fma(dy2,
+                                __clc_fma(dy2, 0.1163921388172173692062032e-10,
+                                          0.208744349831471353536305e-8),
+                                0.275573350756016588011357e-6),
+                      0.248015872460622433115785e-4),
+                  0.138888888889814854814536e-2),
+              0.416666666666660876512776e-1),
+          0.500000000000000005911074e0);
+
+  // At this point sinh(dy) is approximated by dy + sdy,
+  // and cosh(dy) is approximated by 1 + cdy.
+  __CLC_GENTYPE cl = USE_TABLE(cosh_tbl_head, ind);
+  __CLC_GENTYPE ct = USE_TABLE(cosh_tbl_tail, ind);
+  __CLC_GENTYPE sl = USE_TABLE(sinh_tbl_head, ind);
+  __CLC_GENTYPE st = USE_TABLE(sinh_tbl_tail, ind);
+
+  __CLC_GENTYPE z =
+      __clc_fma(
+          sl, dy,
+          __clc_fma(sl, sdy,
+                    __clc_fma(cl, cdy,
+                              __clc_fma(st, dy, __clc_fma(st, sdy, ct * cdy)) +
+                                  ct))) +
+      cl;
+
+  // Other cases
+  z = y < 0x1.0p-28 ? 1.0 : z;
+
+  __CLC_GENTYPE t = __clc_exp(y - 0x1.62e42fefa3800p-1);
+  t = __clc_fma(t, -0x1.ef35793c76641p-45, t);
+  z = y >= small_threshold ? t : z;
+
+  z = y >= max_cosh_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z;
+
+  z = __clc_isinf(x) || __clc_isnan(x) ? y : z;
+
+  return z;
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
+  return __CLC_CONVERT_GENTYPE(__clc_cosh(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
--- a/libclc/clc/lib/generic/math/clc_sinh.cl
+++ b/libclc/clc/lib/generic/math/clc_sinh.cl
@ -0,0 +1,23 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_copysign.h>
+#include <clc/math/clc_exp.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_min.h>
+
+#define __CLC_BODY <clc_sinh.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_sinh.inc
+++ b/libclc/clc/lib/generic/math/clc_sinh.inc
@ -0,0 +1,201 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
+  // After dealing with special cases the computation is split into regions as
+  // follows. abs(x) >= max_sinh_arg: sinh(x) = sign(x)*Inf abs(x) >=
+  // small_threshold: sinh(x) = sign(x)*exp(abs(x))/2 computed using the
+  // splitexp and scaleDouble functions as for exp_amd(). abs(x) <
+  // small_threshold: compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
+  // sinh(x) is then sign(x)*z.
+
+  const __CLC_GENTYPE max_sinh_arg = 0x1.65a9fap+6f;
+  const __CLC_GENTYPE small_threshold = 0x1.0a2b24p+3f;
+
+  __CLC_UINTN ux = __CLC_AS_UINTN(x);
+  __CLC_GENTYPE y = __clc_fabs(x);
+  __CLC_UINTN aux = __CLC_AS_UINTN(y);
+  __CLC_UINTN xs = ux ^ aux;
+
+  // We find the integer part y0 of y and the increment dy = y - y0. We then
+  // compute z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) where sinh(y0)
+  // and cosh(y0) are tabulated above.
+  __CLC_INTN ind = __CLC_CONVERT_INTN(y);
+  ind = __CLC_CONVERT_UINTN(ind) > 36U ? 0 : ind;
+
+  __CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
+  __CLC_GENTYPE dy2 = dy * dy;
+
+  __CLC_GENTYPE sdy = __clc_mad(
+      dy2,
+      __clc_mad(
+          dy2,
+          __clc_mad(
+              dy2,
+              __clc_mad(
+                  dy2,
+                  __clc_mad(dy2,
+                            __clc_mad(dy2, 0.7746188980094184251527126e-12f,
+                                      0.160576793121939886190847e-9f),
+                            0.250521176994133472333666e-7f),
+                  0.275573191913636406057211e-5f),
+              0.198412698413242405162014e-3f),
+          0.833333333333329931873097e-2f),
+      0.166666666666666667013899e0f);
+  sdy = __clc_mad(sdy, dy * dy2, dy);
+
+  __CLC_GENTYPE cdy = __clc_mad(
+      dy2,
+      __clc_mad(
+          dy2,
+          __clc_mad(
+              dy2,
+              __clc_mad(
+                  dy2,
+                  __clc_mad(dy2,
+                            __clc_mad(dy2, 0.1163921388172173692062032e-10f,
+                                      0.208744349831471353536305e-8f),
+                            0.275573350756016588011357e-6f),
+                  0.248015872460622433115785e-4f),
+              0.138888888889814854814536e-2f),
+          0.416666666666660876512776e-1f),
+      0.500000000000000005911074e0f);
+  cdy = __clc_mad(cdy, dy2, 1.0f);
+
+  __CLC_GENTYPE sinhcoshh = USE_TABLE(sinhcosh_tbl_head, ind);
+  __CLC_GENTYPE sinhcosht = USE_TABLE(sinhcosh_tbl_tail, ind);
+  __CLC_GENTYPE z = __clc_mad(sinhcosht, sdy, sinhcoshh * cdy);
+  z = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(z));
+
+  // When y is large enough so that the negative exponential is negligible,
+  // so sinh(y) is approximated by sign(x)*exp(y)/2.
+  __CLC_GENTYPE t = __clc_exp(y - 0x1.62e500p-1f);
+  __CLC_GENTYPE zsmall = __clc_mad(0x1.a0210ep-18f, t, t);
+  zsmall = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(zsmall));
+  z = y >= small_threshold ? zsmall : z;
+
+  // Corner cases
+  __CLC_GENTYPE zinf = __CLC_AS_GENTYPE(PINFBITPATT_SP32 | xs);
+  z = y >= max_sinh_arg ? zinf : z;
+  z = aux > PINFBITPATT_SP32 || aux < 0x38800000U ? x : z;
+
+  return z;
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
+  // After dealing with special cases the computation is split into
+  // regions as follows:
+  //
+  // abs(x) >= max_sinh_arg:
+  // sinh(x) = sign(x)*Inf
+  //
+  // abs(x) >= small_threshold:
+  // sinh(x) = sign(x)*exp(abs(x))/2 computed using the
+  // splitexp and scaleDouble functions as for exp_amd().
+  //
+  // abs(x) < small_threshold:
+  // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
+  // sinh(x) is then sign(x)*z.
+
+  // 0x408633ce8fb9f87e
+  const __CLC_GENTYPE max_sinh_arg = 7.10475860073943977113e+02;
+
+  // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
+  const __CLC_GENTYPE small_threshold = 0x1.2b708872320e2p+4;
+
+  __CLC_GENTYPE y = __clc_fabs(x);
+
+  // In this range we find the integer part y0 of y
+  // and the increment dy = y - y0. We then compute
+  // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
+  // where sinh(y0) and cosh(y0) are obtained from tables
+
+  __CLC_INTN ind = __clc_min(__CLC_CONVERT_INTN(y), 36);
+  __CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
+  __CLC_GENTYPE dy2 = dy * dy;
+
+  __CLC_GENTYPE sdy =
+      dy * dy2 *
+      __clc_fma(
+          dy2,
+          __clc_fma(
+              dy2,
+              __clc_fma(
+                  dy2,
+                  __clc_fma(
+                      dy2,
+                      __clc_fma(dy2,
+                                __clc_fma(dy2, 0.7746188980094184251527126e-12,
+                                          0.160576793121939886190847e-9),
+                                0.250521176994133472333666e-7),
+                      0.275573191913636406057211e-5),
+                  0.198412698413242405162014e-3),
+              0.833333333333329931873097e-2),
+          0.166666666666666667013899e0);
+
+  __CLC_GENTYPE cdy =
+      dy2 *
+      __clc_fma(
+          dy2,
+          __clc_fma(
+              dy2,
+              __clc_fma(
+                  dy2,
+                  __clc_fma(
+                      dy2,
+                      __clc_fma(dy2,
+                                __clc_fma(dy2, 0.1163921388172173692062032e-10,
+                                          0.208744349831471353536305e-8),
+                                0.275573350756016588011357e-6),
+                      0.248015872460622433115785e-4),
+                  0.138888888889814854814536e-2),
+              0.416666666666660876512776e-1),
+          0.500000000000000005911074e0);
+
+  // At this point sinh(dy) is approximated by dy + sdy.
+  // Shift some significant bits from dy to sdy.
+  __CLC_GENTYPE sdy1 =
+      __CLC_AS_GENTYPE(__CLC_AS_ULONGN(dy) & 0xfffffffff8000000UL);
+  __CLC_GENTYPE sdy2 = sdy + (dy - sdy1);
+
+  __CLC_GENTYPE cl = USE_TABLE(cosh_tbl_head, ind);
+  __CLC_GENTYPE ct = USE_TABLE(cosh_tbl_tail, ind);
+  __CLC_GENTYPE sl = USE_TABLE(sinh_tbl_head, ind);
+  __CLC_GENTYPE st = USE_TABLE(sinh_tbl_tail, ind);
+
+  __CLC_GENTYPE z =
+      __clc_fma(cl, sdy1,
+                __clc_fma(sl, cdy,
+                          __clc_fma(cl, sdy2,
+                                    __clc_fma(ct, sdy1,
+                                              __clc_fma(st, cdy, ct * sdy2)) +
+                                        st))) +
+      sl;
+
+  // Other cases
+  z = (y < 0x1.0p-28) || __clc_isnan(x) || __clc_isinf(x) ? y : z;
+
+  __CLC_GENTYPE t = __clc_exp(y - 0x1.62e42fefa3800p-1);
+  t = __clc_fma(t, -0x1.ef35793c76641p-45, t);
+  z = y >= small_threshold ? t : z;
+  z = y >= max_sinh_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z;
+
+  return __clc_copysign(z, x);
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
+  return __CLC_CONVERT_GENTYPE(__clc_sinh(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
--- a/libclc/clc/lib/generic/math/clc_tables.cl
+++ b/libclc/clc/lib/generic/math/clc_tables.cl
@ -339,6 +339,37 @@ DECLARE_TABLE(float, CBRT_TBL_TAIL, 129) = {

 CLC_TABLE_FUNCTION(float, CBRT_TBL_TAIL, cbrt_tbl_tail);

+// Tabulated values of sinh(i) and cosh(i) for i = 0,...,36.
+DECLARE_TABLE(float, SINHCOSH_TBL_HEAD, 37) = {
+    0x0.000000p+0f,  0x1.2cd9fcp+0f,  0x1.d03cf6p+1f,  0x1.40926ep+3f,
+    0x1.b4a380p+4f,  0x1.28d016p+6f,  0x1.936d22p+7f,  0x1.122876p+9f,
+    0x1.749ea6p+10f, 0x1.fa7158p+11f, 0x1.5829dcp+13f, 0x1.d3c448p+14f,
+    0x1.3de166p+16f, 0x1.b00b5ap+17f, 0x1.259ac4p+19f, 0x1.8f0ccap+20f,
+    0x1.0f2ebep+22f, 0x1.709348p+23f, 0x1.f4f220p+24f, 0x1.546d90p+26f,
+    0x1.ceb088p+27f, 0x1.3a6e20p+29f, 0x1.ab5adcp+30f, 0x1.226af4p+32f,
+    0x1.8ab7fcp+33f, 0x1.0c3d3ap+35f, 0x1.6c9326p+36f, 0x1.ef8230p+37f,
+    0x1.50bba4p+39f, 0x1.c9aae4p+40f, 0x1.370470p+42f, 0x1.a6b766p+43f,
+    0x1.1f43fcp+45f, 0x1.866f34p+46f, 0x1.0953e2p+48f, 0x1.689e22p+49f,
+    0x1.ea215ap+50f,
+};
+
+CLC_TABLE_FUNCTION(float, SINHCOSH_TBL_HEAD, sinhcosh_tbl_head);
+
+DECLARE_TABLE(float, SINHCOSH_TBL_TAIL, 37) = {
+    0x1.000000p+0f,  0x1.8b0756p+0f,  0x1.e18fa0p+1f,  0x1.422a4ap+3f,
+    0x1.b4ee86p+4f,  0x1.28d6fcp+6f,  0x1.936e68p+7f,  0x1.122894p+9f,
+    0x1.749eaap+10f, 0x1.fa7158p+11f, 0x1.5829dep+13f, 0x1.d3c448p+14f,
+    0x1.3de166p+16f, 0x1.b00b5ap+17f, 0x1.259ac4p+19f, 0x1.8f0ccap+20f,
+    0x1.0f2ebep+22f, 0x1.709348p+23f, 0x1.f4f220p+24f, 0x1.546d90p+26f,
+    0x1.ceb088p+27f, 0x1.3a6e20p+29f, 0x1.ab5adcp+30f, 0x1.226af4p+32f,
+    0x1.8ab7fcp+33f, 0x1.0c3d3ap+35f, 0x1.6c9326p+36f, 0x1.ef8230p+37f,
+    0x1.50bba4p+39f, 0x1.c9aae4p+40f, 0x1.370470p+42f, 0x1.a6b766p+43f,
+    0x1.1f43fcp+45f, 0x1.866f34p+46f, 0x1.0953e2p+48f, 0x1.689e22p+49f,
+    0x1.ea215ap+50f,
+};
+
+CLC_TABLE_FUNCTION(float, SINHCOSH_TBL_TAIL, sinhcosh_tbl_tail);
+
 #ifdef cl_khr_fp64

 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
@ -1279,4 +1310,73 @@ DECLARE_TABLE(double, CBRT_REM_TBL_TAIL, 5) = {

 CLC_TABLE_FUNCTION(double, CBRT_REM_TBL_TAIL, cbrt_rem_tbl_tail);

+DECLARE_TABLE(double, SINH_TBL_HEAD, 37) = {
+    0x0.0000000000000p+0,  0x1.2cd9fc0000000p+0,  0x1.d03cf60000000p+1,
+    0x1.40926e0000000p+3,  0x1.b4a3800000000p+4,  0x1.28d0160000000p+6,
+    0x1.936d228000000p+7,  0x1.1228768000000p+9,  0x1.749ea50000000p+10,
+    0x1.fa71570000000p+11, 0x1.5829dc8000000p+13, 0x1.d3c4488000000p+14,
+    0x1.3de1650000000p+16, 0x1.b00b590000000p+17, 0x1.259ac48000000p+19,
+    0x1.8f0cca8000000p+20, 0x1.0f2ebd0000000p+22, 0x1.7093488000000p+23,
+    0x1.f4f2208000000p+24, 0x1.546d8f8000000p+26, 0x1.ceb0888000000p+27,
+    0x1.3a6e1f8000000p+29, 0x1.ab5adb8000000p+30, 0x1.226af30000000p+32,
+    0x1.8ab7fb0000000p+33, 0x1.0c3d390000000p+35, 0x1.6c93268000000p+36,
+    0x1.ef822f0000000p+37, 0x1.50bba30000000p+39, 0x1.c9aae40000000p+40,
+    0x1.3704708000000p+42, 0x1.a6b7658000000p+43, 0x1.1f43fc8000000p+45,
+    0x1.866f348000000p+46, 0x1.0953e28000000p+48, 0x1.689e220000000p+49,
+    0x1.ea215a0000000p+50,
+};
+
+DECLARE_TABLE(double, SINH_TBL_TAIL, 37) = {
+    0x0.0000000000000p+0,  0x1.13ae6096a0092p-26, 0x1.db70cfb79a640p-26,
+    0x1.c2526b66dc067p-23, 0x1.b81b18647f380p-23, 0x1.bc1cdd1e1eb08p-20,
+    0x1.d9f201534fb09p-19, 0x1.d1c064a4e9954p-18, 0x1.4eca65d06ea74p-18,
+    0x1.0c259bcc0ecc5p-15, 0x1.b5a6647cf9016p-13, 0x1.9691adefb0870p-15,
+    0x1.3410fc29cde38p-10, 0x1.6a31a50b6fb3cp-11, 0x1.7defc71805c40p-10,
+    0x1.eb49fd80e0babp-6,  0x1.4fffc7bcd5920p-7,  0x1.03a93b6c63435p-3,
+    0x1.1940bb255fd1cp-4,  0x1.ed26e14260b50p-2,  0x1.b47401fc9f2a2p+0,
+    0x1.67bb3f55634f1p+3,  0x1.c435ff8194ddcp+2,  0x1.d8fee052ba63ap+5,
+    0x1.51d7edccde3f6p+7,  0x1.04b1644557d1ap+8,  0x1.6a6b5ca0a9dc4p+8,
+    0x1.fd9cc72249abap+11, 0x1.e58de693edab5p+13, 0x1.8c70158ac6363p+14,
+    0x1.7614764f43e20p+15, 0x1.6337db36fc718p+17, 0x1.12d98b1f611e2p+19,
+    0x1.392bc108b37ccp+19, 0x1.ce87bdc3473dcp+22, 0x1.bc8d5ae99ad14p+21,
+    0x1.d20d76744835cp+22,
+};
+
+DECLARE_TABLE(double, COSH_TBL_HEAD, 37) = {
+    0x1.0000000000000p+0,  0x1.8b07550000000p+0,  0x1.e18fa08000000p+1,
+    0x1.422a490000000p+3,  0x1.b4ee858000000p+4,  0x1.28d6fc8000000p+6,
+    0x1.936e678000000p+7,  0x1.1228948000000p+9,  0x1.749eaa8000000p+10,
+    0x1.fa71580000000p+11, 0x1.5829dd0000000p+13, 0x1.d3c4488000000p+14,
+    0x1.3de1650000000p+16, 0x1.b00b590000000p+17, 0x1.259ac48000000p+19,
+    0x1.8f0cca8000000p+20, 0x1.0f2ebd0000000p+22, 0x1.7093488000000p+23,
+    0x1.f4f2208000000p+24, 0x1.546d8f8000000p+26, 0x1.ceb0888000000p+27,
+    0x1.3a6e1f8000000p+29, 0x1.ab5adb8000000p+30, 0x1.226af30000000p+32,
+    0x1.8ab7fb0000000p+33, 0x1.0c3d390000000p+35, 0x1.6c93268000000p+36,
+    0x1.ef822f0000000p+37, 0x1.50bba30000000p+39, 0x1.c9aae40000000p+40,
+    0x1.3704708000000p+42, 0x1.a6b7658000000p+43, 0x1.1f43fc8000000p+45,
+    0x1.866f348000000p+46, 0x1.0953e28000000p+48, 0x1.689e220000000p+49,
+    0x1.ea215a0000000p+50,
+};
+
+DECLARE_TABLE(double, COSH_TBL_TAIL, 37) = {
+    0x0.0000000000000p+0,  0x1.d9f5504c2bd28p-28, 0x1.7cb66f0a4c9fdp-25,
+    0x1.f58617928e588p-23, 0x1.bc7d000c38d48p-25, 0x1.f7f9d4e329998p-21,
+    0x1.6e6e464885269p-19, 0x1.ba3a8b946c154p-19, 0x1.3f4e76110d5a4p-18,
+    0x1.17622515a3e2bp-15, 0x1.4dc4b528af3d0p-17, 0x1.1156278615e10p-14,
+    0x1.35ad50ed821f5p-10, 0x1.6b61055f2935cp-11, 0x1.7e2794a601240p-10,
+    0x1.eb4b45f6aadd3p-6,  0x1.5000b967b3698p-7,  0x1.03a940fadc092p-3,
+    0x1.1940bf3bf874cp-4,  0x1.ed26e1a2a2110p-2,  0x1.b4740205796d6p+0,
+    0x1.67bb3f55cb85dp+3,  0x1.c435ff81e18acp+2,  0x1.d8fee052bdea4p+5,
+    0x1.51d7edccde926p+7,  0x1.04b1644557e0ep+8,  0x1.6a6b5ca0a9e1cp+8,
+    0x1.fd9cc72249abep+11, 0x1.e58de693edab5p+13, 0x1.8c70158ac6364p+14,
+    0x1.7614764f43e20p+15, 0x1.6337db36fc718p+17, 0x1.12d98b1f611e2p+19,
+    0x1.392bc108b37ccp+19, 0x1.ce87bdc3473dcp+22, 0x1.bc8d5ae99ad14p+21,
+    0x1.d20d76744835cp+22,
+};
+
+CLC_TABLE_FUNCTION(double, SINH_TBL_HEAD, sinh_tbl_head);
+CLC_TABLE_FUNCTION(double, SINH_TBL_TAIL, sinh_tbl_tail);
+CLC_TABLE_FUNCTION(double, COSH_TBL_HEAD, cosh_tbl_head);
+CLC_TABLE_FUNCTION(double, COSH_TBL_TAIL, cosh_tbl_tail);
+
 #endif // cl_khr_fp64
--- a/libclc/clc/lib/generic/math/clc_tanh.cl
+++ b/libclc/clc/lib/generic/math/clc_tanh.cl
@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_exp.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_min.h>
+
+#define __CLC_BODY <clc_tanh.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_tanh.inc
+++ b/libclc/clc/lib/generic/math/clc_tanh.inc
@ -0,0 +1,137 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
+  // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
+  // to the following three formulae:
+  // 1.  (exp(x) - exp(-x))/(exp(x) + exp(-x))
+  // 2.  (1 - (2/(exp(2*x) + 1 )))
+  // 3.  (exp(2*x) - 1)/(exp(2*x) + 1)
+  // but computationally, some formulae are better on some ranges.
+
+  const __CLC_GENTYPE large_threshold = 0x1.0a2b24p+3f;
+
+  __CLC_UINTN ux = __CLC_AS_UINTN(x);
+  __CLC_UINTN aux = ux & EXSIGNBIT_SP32;
+  __CLC_UINTN xs = ux ^ aux;
+
+  __CLC_GENTYPE y = __CLC_AS_GENTYPE(aux);
+  __CLC_GENTYPE y2 = y * y;
+
+  __CLC_GENTYPE a1 = __clc_mad(
+      y2, __clc_mad(y2, 0.4891631088530669873e-4F, -0.14628356048797849e-2F),
+      -0.28192806108402678e0F);
+  __CLC_GENTYPE b1 =
+      __clc_mad(y2, 0.3427017942262751343e0F, 0.845784192581041099e0F);
+
+  __CLC_GENTYPE a2 = __clc_mad(
+      y2, __clc_mad(y2, 0.3827534993599483396e-4F, -0.12325644183611929e-2F),
+      -0.24069858695196524e0F);
+  __CLC_GENTYPE b2 =
+      __clc_mad(y2, 0.292529068698052819e0F, 0.72209738473684982e0F);
+
+  __CLC_INTN c = y < 0.9f;
+  __CLC_GENTYPE a = c ? a1 : a2;
+  __CLC_GENTYPE b = c ? b1 : b2;
+  __CLC_GENTYPE zlo = __clc_mad(MATH_DIVIDE(a, b), y * y2, y);
+
+  __CLC_GENTYPE p = __clc_exp(2.0f * y) + 1.0f;
+  __CLC_GENTYPE zhi = 1.0F - MATH_DIVIDE(2.0F, p);
+
+  __CLC_GENTYPE z = y <= 1.0f ? zlo : zhi;
+  z = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(z));
+
+  // Edge cases
+  __CLC_GENTYPE sone = __CLC_AS_GENTYPE(0x3f800000U | xs);
+  z = y > large_threshold ? sone : z;
+  z = aux < 0x39000000 || aux > 0x7f800000 ? x : z;
+
+  return z;
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
+  // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
+  // to the following three formulae:
+  // 1.  (exp(x) - exp(-x))/(exp(x) + exp(-x))
+  // 2.  (1 - (2/(exp(2*x) + 1 )))
+  // 3.  (exp(2*x) - 1)/(exp(2*x) + 1)
+  // but computationally, some formulae are better on some ranges.
+
+  // The point at which e^-x is insignificant compared to e^x = ln(2^27)
+  const __CLC_GENTYPE large_threshold = 0x1.2b708872320e2p+4;
+
+  __CLC_ULONGN ux = __CLC_AS_ULONGN(x);
+  __CLC_ULONGN ax = ux & ~SIGNBIT_DP64;
+  __CLC_ULONGN sx = ux ^ ax;
+  __CLC_GENTYPE y = __CLC_AS_GENTYPE(ax);
+  __CLC_GENTYPE y2 = y * y;
+
+  // y < 0.9
+  __CLC_GENTYPE znl =
+      __clc_fma(y2,
+                __clc_fma(y2,
+                          __clc_fma(y2, -0.142077926378834722618091e-7,
+                                    -0.200047621071909498730453e-3),
+                          -0.176016349003044679402273e-1),
+                -0.274030424656179760118928e0);
+
+  __CLC_GENTYPE zdl =
+      __clc_fma(y2,
+                __clc_fma(y2,
+                          __clc_fma(y2, 0.2091140262529164482568557e-3,
+                                    0.201562166026937652780575e-1),
+                          0.381641414288328849317962e0),
+                0.822091273968539282568011e0);
+
+  // 0.9 <= y <= 1
+  __CLC_GENTYPE znm =
+      __clc_fma(y2,
+                __clc_fma(y2,
+                          __clc_fma(y2, -0.115475878996143396378318e-7,
+                                    -0.165597043903549960486816e-3),
+                          -0.146173047288731678404066e-1),
+                -0.227793870659088295252442e0);
+
+  __CLC_GENTYPE zdm =
+      __clc_fma(y2,
+                __clc_fma(y2,
+                          __clc_fma(y2, 0.173076050126225961768710e-3,
+                                    0.167358775461896562588695e-1),
+                          0.317204558977294374244770e0),
+                0.683381611977295894959554e0);
+
+  __CLC_LONGN c = y < 0.9;
+  __CLC_GENTYPE zn = c ? znl : znm;
+  __CLC_GENTYPE zd = c ? zdl : zdm;
+  __CLC_GENTYPE z = y + y * y2 * MATH_DIVIDE(zn, zd);
+
+  // y > 1
+  __CLC_GENTYPE p = __clc_exp(2.0 * y) + 1.0;
+  __CLC_GENTYPE zg = 1.0 - 2.0 / p;
+
+  z = y > 1.0 ? zg : z;
+
+  // Other cases
+  z = y < 0x1.0p-28 || ax > PINFBITPATT_DP64 ? x : z;
+
+  z = y > large_threshold ? 1.0 : z;
+
+  return __CLC_AS_GENTYPE(sx | __CLC_AS_ULONGN(z));
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
+  return __CLC_CONVERT_GENTYPE(__clc_tanh(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
--- a/libclc/generic/lib/math/cosh.cl
+++ b/libclc/generic/lib/math/cosh.cl
@ -7,179 +7,8 @@
 //===----------------------------------------------------------------------===//

 #include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
+#include <clc/math/clc_cosh.h>

-_CLC_OVERLOAD _CLC_DEF float cosh(float x) {
-
-    // After dealing with special cases the computation is split into regions as follows.
-    // abs(x) >= max_cosh_arg:
-    // cosh(x) = sign(x)*Inf
-    // abs(x) >= small_threshold:
-    // cosh(x) = sign(x)*exp(abs(x))/2 computed using the
-    // splitexp and scaleDouble functions as for exp_amd().
-    // abs(x) < small_threshold:
-    // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
-    // cosh(x) is then z.
-
-    const float max_cosh_arg = 0x1.65a9fap+6f;
-    const float small_threshold = 0x1.0a2b24p+3f;
-
-    uint ux = as_uint(x);
-    uint aux = ux & EXSIGNBIT_SP32;
-    float y = as_float(aux);
-
-    // Find the integer part y0 of y and the increment dy = y - y0. We then compute
-    // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
-    // z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
-    // where sinh(y0) and cosh(y0) are tabulated above.
-
-    int ind = (int)y;
-    ind = (uint)ind > 36U ? 0 : ind;
-
-    float dy = y - ind;
-    float dy2 = dy * dy;
-
-    float sdy = mad(dy2,
-                    mad(dy2,
-                        mad(dy2,
-                            mad(dy2,
-                                mad(dy2,
-                                    mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f),
-                                    0.250521176994133472333666e-7f),
-                                0.275573191913636406057211e-5f),
-                            0.198412698413242405162014e-3f),
-                        0.833333333333329931873097e-2f),
-                    0.166666666666666667013899e0f);
-    sdy = mad(sdy, dy*dy2, dy);
-
-    float cdy = mad(dy2,
-                    mad(dy2,
-                        mad(dy2,
-                            mad(dy2,
-                                mad(dy2,
-                                    mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f),
-                                    0.275573350756016588011357e-6f),
-                                0.248015872460622433115785e-4f),
-                            0.138888888889814854814536e-2f),
-                        0.416666666666660876512776e-1f),
-                    0.500000000000000005911074e0f);
-    cdy = mad(cdy, dy2, 1.0f);
-
-    float2 tv = USE_TABLE(sinhcosh_tbl, ind);
-    float z = mad(tv.s0, sdy, tv.s1 * cdy);
-
-    // When exp(-x) is insignificant compared to exp(x), return exp(x)/2
-    float t = exp(y - 0x1.62e500p-1f);
-    float zsmall = mad(0x1.a0210ep-18f, t, t);
-    z = y >= small_threshold ? zsmall : z;
-
-    // Corner cases
-    z = y >= max_cosh_arg ? as_float(PINFBITPATT_SP32) : z;
-    z = aux > PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : z;
-    z = aux < 0x38800000 ? 1.0f : z;
-
-    return z;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cosh, float);
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double cosh(double x) {
-
-    // After dealing with special cases the computation is split into
-    // regions as follows:
-    //
-    // abs(x) >= max_cosh_arg:
-    // cosh(x) = sign(x)*Inf
-    //
-    // abs(x) >= small_threshold:
-    // cosh(x) = sign(x)*exp(abs(x))/2 computed using the
-    // splitexp and scaleDouble functions as for exp_amd().
-    //
-    // abs(x) < small_threshold:
-    // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
-    // cosh(x) is then sign(x)*z.
-
-    // This is ln(2^1025)
-    const double max_cosh_arg = 7.10475860073943977113e+02;      // 0x408633ce8fb9f87e
-
-    // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
-    const double small_threshold = 0x1.2b708872320e2p+4;
-
-    double y = fabs(x);
-
-    // In this range we find the integer part y0 of y 
-    // and the increment dy = y - y0. We then compute
-    // z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
-    // where sinh(y0) and cosh(y0) are tabulated above.
-
-    int ind = min((int)y, 36);
-    double dy = y - ind;
-    double dy2 = dy * dy;
-
-    double sdy = dy * dy2 *
-	         fma(dy2,
-		     fma(dy2,
-			 fma(dy2,
-			     fma(dy2,
-				 fma(dy2,
-				     fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9),
-				     0.250521176994133472333666e-7),
-				 0.275573191913636406057211e-5),
-			     0.198412698413242405162014e-3),
-			 0.833333333333329931873097e-2),
-		     0.166666666666666667013899e0);
-
-    double cdy = dy2 * fma(dy2,
-	                   fma(dy2,
-			       fma(dy2,
-				   fma(dy2,
-				       fma(dy2,
-					   fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8),
-					   0.275573350756016588011357e-6),
-				       0.248015872460622433115785e-4),
-				   0.138888888889814854814536e-2),
-			       0.416666666666660876512776e-1),
-			   0.500000000000000005911074e0);
-
-    // At this point sinh(dy) is approximated by dy + sdy,
-    // and cosh(dy) is approximated by 1 + cdy.
-    double2 tv = USE_TABLE(cosh_tbl, ind);
-    double cl = tv.s0;
-    double ct = tv.s1;
-    tv = USE_TABLE(sinh_tbl, ind);
-    double sl = tv.s0;
-    double st = tv.s1;
-
-    double z = fma(sl, dy, fma(sl, sdy, fma(cl, cdy, fma(st, dy, fma(st, sdy, ct*cdy)) + ct))) + cl;
-
-    // Other cases
-    z = y < 0x1.0p-28 ? 1.0 : z;
-
-    double t = exp(y - 0x1.62e42fefa3800p-1);
-    t =  fma(t, -0x1.ef35793c76641p-45, t);
-    z = y >= small_threshold ? t : z;
-
-    z = y >= max_cosh_arg ? as_double(PINFBITPATT_DP64) : z;
-
-    z = isinf(x) | isnan(x) ? y : z;
-
-    return z;
-
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(cosh)
-
-#endif
+#define FUNCTION cosh
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/generic/lib/math/sinh.cl
+++ b/libclc/generic/lib/math/sinh.cl
@ -7,178 +7,8 @@
 //===----------------------------------------------------------------------===//

 #include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
+#include <clc/math/clc_sinh.h>

-_CLC_OVERLOAD _CLC_DEF float sinh(float x)
-{
-    // After dealing with special cases the computation is split into regions as follows.
-    // abs(x) >= max_sinh_arg:
-    // sinh(x) = sign(x)*Inf
-    // abs(x) >= small_threshold:
-    // sinh(x) = sign(x)*exp(abs(x))/2 computed using the splitexp and scaleDouble functions as for exp_amd().
-    // abs(x) < small_threshold:
-    // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
-    // sinh(x) is then sign(x)*z.
-
-    const float max_sinh_arg = 0x1.65a9fap+6f;
-    const float small_threshold = 0x1.0a2b24p+3f;
-
-    uint ux = as_uint(x);
-    uint aux = ux & EXSIGNBIT_SP32;
-    uint xs = ux ^ aux;
-    float y = as_float(aux);
-
-    // We find the integer part y0 of y and the increment dy = y - y0. We then compute
-    // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
-    // where sinh(y0) and cosh(y0) are tabulated above.
-    int ind = (int) y;
-    ind = (uint)ind > 36U ? 0 : ind;
-
-    float dy = y - ind;
-    float dy2 = dy * dy;
-
-    float sdy = mad(dy2,
-                    mad(dy2,
-                        mad(dy2,
-                            mad(dy2,
-                                mad(dy2,
-                                    mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f),
-                                    0.250521176994133472333666e-7f),
-                                0.275573191913636406057211e-5f),
-                            0.198412698413242405162014e-3f),
-                         0.833333333333329931873097e-2f),
-                    0.166666666666666667013899e0f);
-    sdy = mad(sdy, dy*dy2, dy);
-
-    float cdy = mad(dy2,
-                    mad(dy2,
-                        mad(dy2,
-                            mad(dy2,
-                                mad(dy2,
-                                    mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f),
-                                    0.275573350756016588011357e-6f),
-                                0.248015872460622433115785e-4f),
-                            0.138888888889814854814536e-2f),
-                        0.416666666666660876512776e-1f),
-                    0.500000000000000005911074e0f);
-    cdy = mad(cdy, dy2, 1.0f);
-
-    float2 tv = USE_TABLE(sinhcosh_tbl, ind);
-    float z = mad(tv.s1, sdy, tv.s0 * cdy);
-    z = as_float(xs | as_uint(z));
-
-    // When y is large enough so that the negative exponential is negligible,
-    // so sinh(y) is approximated by sign(x)*exp(y)/2.
-    float t = exp(y - 0x1.62e500p-1f);
-    float zsmall = mad(0x1.a0210ep-18f, t, t);
-    zsmall = as_float(xs | as_uint(zsmall));
-    z = y >= small_threshold ? zsmall : z;
-
-    // Corner cases
-    float zinf = as_float(PINFBITPATT_SP32 | xs);
-    z = y >= max_sinh_arg ? zinf : z;
-    z = aux > PINFBITPATT_SP32 | aux < 0x38800000U ? x : z;
-
-    return z;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinh, float);
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double sinh(double x)
-{
-    // After dealing with special cases the computation is split into
-    // regions as follows:
-    //
-    // abs(x) >= max_sinh_arg:
-    // sinh(x) = sign(x)*Inf
-    //
-    // abs(x) >= small_threshold:
-    // sinh(x) = sign(x)*exp(abs(x))/2 computed using the
-    // splitexp and scaleDouble functions as for exp_amd().
-    //
-    // abs(x) < small_threshold:
-    // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
-    // sinh(x) is then sign(x)*z.
-
-    const double max_sinh_arg = 7.10475860073943977113e+02; // 0x408633ce8fb9f87e
-
-    // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
-    const double small_threshold = 0x1.2b708872320e2p+4;
-
-    double y = fabs(x);
-
-    // In this range we find the integer part y0 of y
-    // and the increment dy = y - y0. We then compute
-    // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
-    // where sinh(y0) and cosh(y0) are obtained from tables
-
-    int ind = min((int)y, 36);
-    double dy = y - ind;
-    double dy2 = dy * dy;
-
-    double sdy = dy * dy2 *
-	         fma(dy2,
-		     fma(dy2,
-			 fma(dy2,
-			     fma(dy2,
-				 fma(dy2,
-				     fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9),
-				     0.250521176994133472333666e-7),
-				 0.275573191913636406057211e-5),
-			     0.198412698413242405162014e-3),
-			 0.833333333333329931873097e-2),
-		     0.166666666666666667013899e0);
-
-    double cdy = dy2 * fma(dy2,
-	                   fma(dy2,
-			       fma(dy2,
-				   fma(dy2,
-				       fma(dy2,
-					   fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8),
-					   0.275573350756016588011357e-6),
-				       0.248015872460622433115785e-4),
-				   0.138888888889814854814536e-2),
-			       0.416666666666660876512776e-1),
-			   0.500000000000000005911074e0);
-
-    // At this point sinh(dy) is approximated by dy + sdy.
-    // Shift some significant bits from dy to sdy.
-    double sdy1 = as_double(as_ulong(dy) & 0xfffffffff8000000UL);
-    double sdy2 = sdy + (dy - sdy1);
-
-    double2 tv = USE_TABLE(cosh_tbl, ind);
-    double cl = tv.s0;
-    double ct = tv.s1;
-    tv = USE_TABLE(sinh_tbl, ind);
-    double sl = tv.s0;
-    double st = tv.s1;
-
-    double z = fma(cl, sdy1, fma(sl, cdy, fma(cl, sdy2, fma(ct, sdy1, fma(st, cdy, ct*sdy2)) + st))) + sl;
-
-    // Other cases
-    z = (y < 0x1.0p-28) | isnan(x) | isinf(x) ? y : z;
-
-    double t = exp(y - 0x1.62e42fefa3800p-1);
-    t = fma(t, -0x1.ef35793c76641p-45, t);
-    z = y >= small_threshold ? t : z;
-    z = y >= max_sinh_arg ? as_double(PINFBITPATT_DP64) : z;
-
-    return copysign(z, x);
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(sinh)
-
-#endif
+#define FUNCTION sinh
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/generic/lib/math/tables.cl
+++ b/libclc/generic/lib/math/tables.cl
@ -289,139 +289,9 @@ DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
    230, 139, 2, 0, 0, 0, 0, 0, 0, 0
 };

-// Tabulated values of sinh(i) and cosh(i) for i = 0,...,36.
-DECLARE_TABLE(float2, SINHCOSH_TBL, 37) = {
-    (float2)(0x0.000000p+0f, 0x1.000000p+0f),
-    (float2)(0x1.2cd9fcp+0f, 0x1.8b0756p+0f),
-    (float2)(0x1.d03cf6p+1f, 0x1.e18fa0p+1f),
-    (float2)(0x1.40926ep+3f, 0x1.422a4ap+3f),
-    (float2)(0x1.b4a380p+4f, 0x1.b4ee86p+4f),
-    (float2)(0x1.28d016p+6f, 0x1.28d6fcp+6f),
-    (float2)(0x1.936d22p+7f, 0x1.936e68p+7f),
-    (float2)(0x1.122876p+9f, 0x1.122894p+9f),
-    (float2)(0x1.749ea6p+10f, 0x1.749eaap+10f),
-    (float2)(0x1.fa7158p+11f, 0x1.fa7158p+11f),
-    (float2)(0x1.5829dcp+13f, 0x1.5829dep+13f),
-    (float2)(0x1.d3c448p+14f, 0x1.d3c448p+14f),
-    (float2)(0x1.3de166p+16f, 0x1.3de166p+16f),
-    (float2)(0x1.b00b5ap+17f, 0x1.b00b5ap+17f),
-    (float2)(0x1.259ac4p+19f, 0x1.259ac4p+19f),
-    (float2)(0x1.8f0ccap+20f, 0x1.8f0ccap+20f),
-    (float2)(0x1.0f2ebep+22f, 0x1.0f2ebep+22f),
-    (float2)(0x1.709348p+23f, 0x1.709348p+23f),
-    (float2)(0x1.f4f220p+24f, 0x1.f4f220p+24f),
-    (float2)(0x1.546d90p+26f, 0x1.546d90p+26f),
-    (float2)(0x1.ceb088p+27f, 0x1.ceb088p+27f),
-    (float2)(0x1.3a6e20p+29f, 0x1.3a6e20p+29f),
-    (float2)(0x1.ab5adcp+30f, 0x1.ab5adcp+30f),
-    (float2)(0x1.226af4p+32f, 0x1.226af4p+32f),
-    (float2)(0x1.8ab7fcp+33f, 0x1.8ab7fcp+33f),
-    (float2)(0x1.0c3d3ap+35f, 0x1.0c3d3ap+35f),
-    (float2)(0x1.6c9326p+36f, 0x1.6c9326p+36f),
-    (float2)(0x1.ef8230p+37f, 0x1.ef8230p+37f),
-    (float2)(0x1.50bba4p+39f, 0x1.50bba4p+39f),
-    (float2)(0x1.c9aae4p+40f, 0x1.c9aae4p+40f),
-    (float2)(0x1.370470p+42f, 0x1.370470p+42f),
-    (float2)(0x1.a6b766p+43f, 0x1.a6b766p+43f),
-    (float2)(0x1.1f43fcp+45f, 0x1.1f43fcp+45f),
-    (float2)(0x1.866f34p+46f, 0x1.866f34p+46f),
-    (float2)(0x1.0953e2p+48f, 0x1.0953e2p+48f),
-    (float2)(0x1.689e22p+49f, 0x1.689e22p+49f),
-    (float2)(0x1.ea215ap+50f, 0x1.ea215ap+50f)
-};
-
 TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
 TABLE_FUNCTION(float2, LOG10_TBL, log10_tbl);

 uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
    return *(__constant uint4 *)(PIBITS_TBL + idx);
 }
-
-TABLE_FUNCTION(float2, SINHCOSH_TBL, sinhcosh_tbl);
-
-#ifdef cl_khr_fp64
-
-DECLARE_TABLE(double2, SINH_TBL, 37) = {
-    (double2)(0x0.0000000000000p+0, 0x0.0000000000000p+0),
-    (double2)(0x1.2cd9fc0000000p+0, 0x1.13ae6096a0092p-26),
-    (double2)(0x1.d03cf60000000p+1, 0x1.db70cfb79a640p-26),
-    (double2)(0x1.40926e0000000p+3, 0x1.c2526b66dc067p-23),
-    (double2)(0x1.b4a3800000000p+4, 0x1.b81b18647f380p-23),
-    (double2)(0x1.28d0160000000p+6, 0x1.bc1cdd1e1eb08p-20),
-    (double2)(0x1.936d228000000p+7, 0x1.d9f201534fb09p-19),
-    (double2)(0x1.1228768000000p+9, 0x1.d1c064a4e9954p-18),
-    (double2)(0x1.749ea50000000p+10, 0x1.4eca65d06ea74p-18),
-    (double2)(0x1.fa71570000000p+11, 0x1.0c259bcc0ecc5p-15),
-    (double2)(0x1.5829dc8000000p+13, 0x1.b5a6647cf9016p-13),
-    (double2)(0x1.d3c4488000000p+14, 0x1.9691adefb0870p-15),
-    (double2)(0x1.3de1650000000p+16, 0x1.3410fc29cde38p-10),
-    (double2)(0x1.b00b590000000p+17, 0x1.6a31a50b6fb3cp-11),
-    (double2)(0x1.259ac48000000p+19, 0x1.7defc71805c40p-10),
-    (double2)(0x1.8f0cca8000000p+20, 0x1.eb49fd80e0babp-6),
-    (double2)(0x1.0f2ebd0000000p+22, 0x1.4fffc7bcd5920p-7),
-    (double2)(0x1.7093488000000p+23, 0x1.03a93b6c63435p-3),
-    (double2)(0x1.f4f2208000000p+24, 0x1.1940bb255fd1cp-4),
-    (double2)(0x1.546d8f8000000p+26, 0x1.ed26e14260b50p-2),
-    (double2)(0x1.ceb0888000000p+27, 0x1.b47401fc9f2a2p+0),
-    (double2)(0x1.3a6e1f8000000p+29, 0x1.67bb3f55634f1p+3),
-    (double2)(0x1.ab5adb8000000p+30, 0x1.c435ff8194ddcp+2),
-    (double2)(0x1.226af30000000p+32, 0x1.d8fee052ba63ap+5),
-    (double2)(0x1.8ab7fb0000000p+33, 0x1.51d7edccde3f6p+7),
-    (double2)(0x1.0c3d390000000p+35, 0x1.04b1644557d1ap+8),
-    (double2)(0x1.6c93268000000p+36, 0x1.6a6b5ca0a9dc4p+8),
-    (double2)(0x1.ef822f0000000p+37, 0x1.fd9cc72249abap+11),
-    (double2)(0x1.50bba30000000p+39, 0x1.e58de693edab5p+13),
-    (double2)(0x1.c9aae40000000p+40, 0x1.8c70158ac6363p+14),
-    (double2)(0x1.3704708000000p+42, 0x1.7614764f43e20p+15),
-    (double2)(0x1.a6b7658000000p+43, 0x1.6337db36fc718p+17),
-    (double2)(0x1.1f43fc8000000p+45, 0x1.12d98b1f611e2p+19),
-    (double2)(0x1.866f348000000p+46, 0x1.392bc108b37ccp+19),
-    (double2)(0x1.0953e28000000p+48, 0x1.ce87bdc3473dcp+22),
-    (double2)(0x1.689e220000000p+49, 0x1.bc8d5ae99ad14p+21),
-    (double2)(0x1.ea215a0000000p+50, 0x1.d20d76744835cp+22),
-};
-
-DECLARE_TABLE(double2, COSH_TBL, 37) = {
-    (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
-    (double2)(0x1.8b07550000000p+0, 0x1.d9f5504c2bd28p-28),
-    (double2)(0x1.e18fa08000000p+1, 0x1.7cb66f0a4c9fdp-25),
-    (double2)(0x1.422a490000000p+3, 0x1.f58617928e588p-23),
-    (double2)(0x1.b4ee858000000p+4, 0x1.bc7d000c38d48p-25),
-    (double2)(0x1.28d6fc8000000p+6, 0x1.f7f9d4e329998p-21),
-    (double2)(0x1.936e678000000p+7, 0x1.6e6e464885269p-19),
-    (double2)(0x1.1228948000000p+9, 0x1.ba3a8b946c154p-19),
-    (double2)(0x1.749eaa8000000p+10, 0x1.3f4e76110d5a4p-18),
-    (double2)(0x1.fa71580000000p+11, 0x1.17622515a3e2bp-15),
-    (double2)(0x1.5829dd0000000p+13, 0x1.4dc4b528af3d0p-17),
-    (double2)(0x1.d3c4488000000p+14, 0x1.1156278615e10p-14),
-    (double2)(0x1.3de1650000000p+16, 0x1.35ad50ed821f5p-10),
-    (double2)(0x1.b00b590000000p+17, 0x1.6b61055f2935cp-11),
-    (double2)(0x1.259ac48000000p+19, 0x1.7e2794a601240p-10),
-    (double2)(0x1.8f0cca8000000p+20, 0x1.eb4b45f6aadd3p-6),
-    (double2)(0x1.0f2ebd0000000p+22, 0x1.5000b967b3698p-7),
-    (double2)(0x1.7093488000000p+23, 0x1.03a940fadc092p-3),
-    (double2)(0x1.f4f2208000000p+24, 0x1.1940bf3bf874cp-4),
-    (double2)(0x1.546d8f8000000p+26, 0x1.ed26e1a2a2110p-2),
-    (double2)(0x1.ceb0888000000p+27, 0x1.b4740205796d6p+0),
-    (double2)(0x1.3a6e1f8000000p+29, 0x1.67bb3f55cb85dp+3),
-    (double2)(0x1.ab5adb8000000p+30, 0x1.c435ff81e18acp+2),
-    (double2)(0x1.226af30000000p+32, 0x1.d8fee052bdea4p+5),
-    (double2)(0x1.8ab7fb0000000p+33, 0x1.51d7edccde926p+7),
-    (double2)(0x1.0c3d390000000p+35, 0x1.04b1644557e0ep+8),
-    (double2)(0x1.6c93268000000p+36, 0x1.6a6b5ca0a9e1cp+8),
-    (double2)(0x1.ef822f0000000p+37, 0x1.fd9cc72249abep+11),
-    (double2)(0x1.50bba30000000p+39, 0x1.e58de693edab5p+13),
-    (double2)(0x1.c9aae40000000p+40, 0x1.8c70158ac6364p+14),
-    (double2)(0x1.3704708000000p+42, 0x1.7614764f43e20p+15),
-    (double2)(0x1.a6b7658000000p+43, 0x1.6337db36fc718p+17),
-    (double2)(0x1.1f43fc8000000p+45, 0x1.12d98b1f611e2p+19),
-    (double2)(0x1.866f348000000p+46, 0x1.392bc108b37ccp+19),
-    (double2)(0x1.0953e28000000p+48, 0x1.ce87bdc3473dcp+22),
-    (double2)(0x1.689e220000000p+49, 0x1.bc8d5ae99ad14p+21),
-    (double2)(0x1.ea215a0000000p+50, 0x1.d20d76744835cp+22)
-};
-
-TABLE_FUNCTION(double2, SINH_TBL, sinh_tbl);
-TABLE_FUNCTION(double2, COSH_TBL, cosh_tbl);
-
-#endif // cl_khr_fp64
--- a/libclc/generic/lib/math/tanh.cl
+++ b/libclc/generic/lib/math/tanh.cl
@ -7,133 +7,8 @@
 //===----------------------------------------------------------------------===//

 #include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
+#include <clc/math/clc_tanh.h>

-_CLC_OVERLOAD _CLC_DEF float tanh(float x)
-{
-    // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
-    // to the following three formulae:
-    // 1.  (exp(x) - exp(-x))/(exp(x) + exp(-x))
-    // 2.  (1 - (2/(exp(2*x) + 1 )))
-    // 3.  (exp(2*x) - 1)/(exp(2*x) + 1)
-    // but computationally, some formulae are better on some ranges.
-
-    const float large_threshold = 0x1.0a2b24p+3f;
-
-    uint ux = as_uint(x);
-    uint aux = ux & EXSIGNBIT_SP32;
-    uint xs = ux ^ aux;
-
-    float y = as_float(aux);
-    float y2 = y*y;
-
-    float a1 = mad(y2,
-                   mad(y2, 0.4891631088530669873e-4F, -0.14628356048797849e-2F),
-                   -0.28192806108402678e0F);
-    float b1 = mad(y2, 0.3427017942262751343e0F, 0.845784192581041099e0F);
-
-    float a2 = mad(y2,
-                   mad(y2, 0.3827534993599483396e-4F, -0.12325644183611929e-2F),
-                   -0.24069858695196524e0F);
-    float b2 = mad(y2, 0.292529068698052819e0F, 0.72209738473684982e0F);
-
-    int c = y < 0.9f;
-    float a = c ? a1 : a2;
-    float b = c ? b1 : b2;
-    float zlo = mad(MATH_DIVIDE(a, b), y*y2, y);
-
-    float p = exp(2.0f * y) + 1.0f;
-    float zhi = 1.0F - MATH_DIVIDE(2.0F, p);
-
-    float z = y <= 1.0f ? zlo : zhi;
-    z = as_float(xs | as_uint(z));
-
-    // Edge cases
-    float sone = as_float(0x3f800000U | xs);
-    z = y > large_threshold ? sone : z;
-    z = aux < 0x39000000 | aux > 0x7f800000 ? x : z;
-
-    return z;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, tanh, float);
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double tanh(double x)
-{
-    // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
-    // to the following three formulae:
-    // 1.  (exp(x) - exp(-x))/(exp(x) + exp(-x))
-    // 2.  (1 - (2/(exp(2*x) + 1 )))
-    // 3.  (exp(2*x) - 1)/(exp(2*x) + 1)
-    // but computationally, some formulae are better on some ranges.
-
-    // The point at which e^-x is insignificant compared to e^x = ln(2^27)
-    const double large_threshold = 0x1.2b708872320e2p+4;
-
-    ulong ux = as_ulong(x);
-    ulong ax = ux & ~SIGNBIT_DP64;
-    ulong sx = ux ^ ax;
-    double y = as_double(ax);
-    double y2 = y * y;
-
-    // y < 0.9
-    double znl = fma(y2,
-                     fma(y2,
-                         fma(y2, -0.142077926378834722618091e-7, -0.200047621071909498730453e-3),
-                         -0.176016349003044679402273e-1),
-                     -0.274030424656179760118928e0);
-
-    double zdl = fma(y2,
-                     fma(y2,
-                         fma(y2, 0.2091140262529164482568557e-3, 0.201562166026937652780575e-1),
-                         0.381641414288328849317962e0),
-                     0.822091273968539282568011e0);
-
-    // 0.9 <= y <= 1
-    double znm = fma(y2,
-                     fma(y2,
-                         fma(y2, -0.115475878996143396378318e-7, -0.165597043903549960486816e-3),
-                         -0.146173047288731678404066e-1),
-                     -0.227793870659088295252442e0);
-
-    double zdm = fma(y2,
-                     fma(y2,
-                         fma(y2, 0.173076050126225961768710e-3, 0.167358775461896562588695e-1),
-                         0.317204558977294374244770e0),
-                     0.683381611977295894959554e0);
-
-    int c = y < 0.9;
-    double zn = c ? znl : znm;
-    double zd = c ? zdl : zdm;
-    double z = y + y*y2 * MATH_DIVIDE(zn, zd);
-
-    // y > 1
-    double p = exp(2.0 * y) + 1.0;
-    double zg = 1.0 - 2.0 / p;
-
-    z = y > 1.0 ? zg : z;
-
-    // Other cases
-    z = y < 0x1.0p-28 | ax > PINFBITPATT_DP64 ? x : z;
-
-    z = y > large_threshold ? 1.0 : z;
-
-    return as_double(sx | as_ulong(z));
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double);
-
-#endif // cl_khr_fp64
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(tanh)
-
-#endif
+#define FUNCTION tanh
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>