[libclc] Move frexp to CLC library; optimize half vecs (#127836)

This commit moves the frexp builtin to the CLC library.

It simultaneously optimizes the code generated for half vectors, which
was previously scalarizing and casting up to float. With this commit it
still casts up to float, but keeps it in the vector form.
This commit is contained in:
Fraser Cormack 2025-02-20 08:41:45 +00:00 committed by GitHub
parent 079115e6ea
commit 684ad25dfc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 183 additions and 106 deletions

View File

@ -0,0 +1,11 @@
#ifndef __CLC_MATH_CLC_FREXP_H__
#define __CLC_MATH_CLC_FREXP_H__
#define __CLC_FUNCTION __clc_frexp
#define __CLC_BODY <clc/math/unary_decl_with_int_ptr.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY
#undef __CLC_FUNCTION
#endif // __CLC_MATH_CLC_FREXP_H__

View File

@ -0,0 +1,6 @@
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
global __CLC_INTN *iptr);
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
local __CLC_INTN *iptr);
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
private __CLC_INTN *iptr);

View File

@ -0,0 +1,20 @@
#include <clc/utils.h>
#ifndef __CLC_FUNCTION
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x,
private __CLC_INTN *iptr) {
return __CLC_FUNCTION(FUNCTION)(x, iptr);
}
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x,
global __CLC_INTN *iptr) {
return __CLC_FUNCTION(FUNCTION)(x, iptr);
}
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x,
local __CLC_INTN *iptr) {
return __CLC_FUNCTION(FUNCTION)(x, iptr);
}

View File

@ -1,9 +1,7 @@
#ifndef __CLC_RELATIONAL_CLC_SELECT_H__
#define __CLC_RELATIONAL_CLC_SELECT_H__
/* Duplciate these so we don't have to distribute utils.h */
#define __CLC_CONCAT(x, y) x##y
#define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
#include <clc/utils.h>
#define __CLC_SELECT_FN __clc_select
@ -13,7 +11,5 @@
#include <clc/integer/gentype.inc>
#undef __CLC_SELECT_FN
#undef __CLC_CONCAT
#undef __CLC_XCONCAT
#endif // __CLC_RELATIONAL_CLC_SELECT_H__

View File

@ -21,6 +21,7 @@ math/clc_ceil.cl
math/clc_copysign.cl
math/clc_fabs.cl
math/clc_floor.cl
math/clc_frexp.cl
math/clc_mad.cl
math/clc_modf.cl
math/clc_nextafter.cl

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <clc/clc_convert.h>
#include <clc/internal/clc.h>
#include <clc/math/math.h>
#include <clc/relational/clc_select.h>
#include <clc/utils.h>
#define __CLC_BODY <clc_frexp.inc>
#define __CLC_ADDRESS_SPACE private
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#define __CLC_BODY <clc_frexp.inc>
#define __CLC_ADDRESS_SPACE global
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#define __CLC_BODY <clc_frexp.inc>
#define __CLC_ADDRESS_SPACE local
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE

View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 2014 Advanced Micro Devices, Inc.
* Copyright (c) 2016 Aaron Watry
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <clc/clcmacro.h>
#include <clc/utils.h>
#define __CLC_AS_GENTYPE __CLC_XCONCAT(__clc_as_, __CLC_GENTYPE)
#define __CLC_AS_INTN __CLC_XCONCAT(__clc_as_, __CLC_INTN)
#if __CLC_FPSIZE == 32
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
__CLC_INTN i = __CLC_AS_INTN(x);
__CLC_INTN ai = i & 0x7fffffff;
__CLC_INTN d = ai > 0 & ai < 0x00800000;
/* scale subnormal by 2^26 without multiplying */
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0d800000) - 0x1.0p-100f;
ai = __clc_select(ai, __CLC_AS_INTN(s), d);
__CLC_INTN e =
(ai >> 23) - 126 - __clc_select((__CLC_INTN)0, (__CLC_INTN)26, d);
__CLC_INTN t = ai == (__CLC_INTN)0 | e == (__CLC_INTN)129;
i = (i & (__CLC_INTN)0x80000000) | (__CLC_INTN)0x3f000000 | (ai & 0x007fffff);
*ep = __clc_select(e, (__CLC_INTN)0, t);
return __clc_select(__CLC_AS_GENTYPE(i), x, t);
}
#endif
#if __CLC_FPSIZE == 16
#ifdef __CLC_SCALAR
#define __CLC_CONVERT_HALFN __clc_convert_half
#define __CLC_CONVERT_FLOATN __clc_convert_float
#else
#define __CLC_CONVERT_HALFN __CLC_XCONCAT(__clc_convert_half, __CLC_VECSIZE)
#define __CLC_CONVERT_FLOATN __CLC_XCONCAT(__clc_convert_float, __CLC_VECSIZE)
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
return __CLC_CONVERT_HALFN(__clc_frexp(__CLC_CONVERT_FLOATN(x), ep));
}
#undef __CLC_CONVERT_FLOATN
#undef __CLC_CONVERT_HALFN
#endif
#if __CLC_FPSIZE == 64
#ifdef __CLC_SCALAR
#define __CLC_AS_LONGN __clc_as_long
#define __CLC_LONGN long
#define __CLC_CONVERT_INTN __clc_convert_int
#else
#define __CLC_AS_LONGN __CLC_XCONCAT(__clc_as_long, __CLC_VECSIZE)
#define __CLC_LONGN __CLC_XCONCAT(long, __CLC_VECSIZE)
#define __CLC_CONVERT_INTN __CLC_XCONCAT(__clc_convert_int, __CLC_VECSIZE)
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
__CLC_LONGN i = __CLC_AS_LONGN(x);
__CLC_LONGN ai = i & 0x7fffffffffffffffL;
__CLC_LONGN d = ai > 0 & ai < 0x0010000000000000L;
// scale subnormal by 2^54 without multiplying
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0370000000000000L) - 0x1.0p-968;
ai = __clc_select(ai, __CLC_AS_LONGN(s), d);
__CLC_LONGN e = (ai >> 52) - (__CLC_LONGN)1022 -
__clc_select((__CLC_LONGN)0, (__CLC_LONGN)54, d);
__CLC_LONGN t = ai == 0 | e == 1025;
i = (i & (__CLC_LONGN)0x8000000000000000L) |
(__CLC_LONGN)0x3fe0000000000000L |
(ai & (__CLC_LONGN)0x000fffffffffffffL);
*ep = __CLC_CONVERT_INTN(__clc_select(e, 0L, t));
return __clc_select(__CLC_AS_GENTYPE(i), x, t);
}
#undef __CLC_AS_LONGN
#undef __CLC_LONGN
#undef __CLC_CONVERT_INTN
#endif
#undef __CLC_AS_GENTYPE
#undef __CLC_AS_INTN

View File

@ -1,17 +1,6 @@
#include <clc/clc.h>
#include <clc/utils.h>
#include <clc/math/clc_frexp.h>
#define __CLC_BODY <frexp.inc>
#define __CLC_ADDRESS_SPACE private
#define FUNCTION frexp
#define __CLC_BODY <clc/math/unary_def_with_int_ptr.inc>
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#define __CLC_BODY <frexp.inc>
#define __CLC_ADDRESS_SPACE global
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#define __CLC_BODY <frexp.inc>
#define __CLC_ADDRESS_SPACE local
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE

View File

@ -1,87 +0,0 @@
/*
* Copyright (c) 2014 Advanced Micro Devices, Inc.
* Copyright (c) 2016 Aaron Watry
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <clc/clcmacro.h>
#define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE)
#define __CLC_AS_INTN __CLC_XCONCAT(as_, __CLC_INTN)
#if __CLC_FPSIZE == 32
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
__CLC_INTN i = __CLC_AS_INTN(x);
__CLC_INTN ai = i & 0x7fffffff;
__CLC_INTN d = ai > 0 & ai < 0x00800000;
/* scale subnormal by 2^26 without multiplying */
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0d800000) - 0x1.0p-100f;
ai = select(ai, __CLC_AS_INTN(s), d);
__CLC_INTN e = (ai >> 23) - 126 - select((__CLC_INTN)0, (__CLC_INTN)26, d);
__CLC_INTN t = ai == (__CLC_INTN)0 | e == (__CLC_INTN)129;
i = (i & (__CLC_INTN)0x80000000) | (__CLC_INTN)0x3f000000 | (ai & 0x007fffff);
*ep = select(e, (__CLC_INTN)0, t);
return select(__CLC_AS_GENTYPE(i), x, t);
}
#endif
#if __CLC_FPSIZE == 16
#ifdef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x,
__CLC_ADDRESS_SPACE __CLC_INTN *ep) {
return (__CLC_GENTYPE)frexp((float)x, ep);
}
_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, __CLC_GENTYPE, frexp,
__CLC_GENTYPE, __CLC_ADDRESS_SPACE, __CLC_INTN);
#endif
#endif
#if __CLC_FPSIZE == 64
#ifdef __CLC_SCALAR
#define __CLC_AS_LONGN as_long
#define __CLC_LONGN long
#define __CLC_CONVERT_INTN convert_int
#else
#define __CLC_AS_LONGN __CLC_XCONCAT(as_long, __CLC_VECSIZE)
#define __CLC_LONGN __CLC_XCONCAT(long, __CLC_VECSIZE)
#define __CLC_CONVERT_INTN __CLC_XCONCAT(convert_int, __CLC_VECSIZE)
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
__CLC_LONGN i = __CLC_AS_LONGN(x);
__CLC_LONGN ai = i & 0x7fffffffffffffffL;
__CLC_LONGN d = ai > 0 & ai < 0x0010000000000000L;
// scale subnormal by 2^54 without multiplying
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0370000000000000L) - 0x1.0p-968;
ai = select(ai, __CLC_AS_LONGN(s), d);
__CLC_LONGN e = (ai >> 52) - (__CLC_LONGN)1022 - select((__CLC_LONGN)0, (__CLC_LONGN)54, d);
__CLC_LONGN t = ai == 0 | e == 1025;
i = (i & (__CLC_LONGN)0x8000000000000000L) | (__CLC_LONGN)0x3fe0000000000000L | (ai & (__CLC_LONGN)0x000fffffffffffffL);
*ep = __CLC_CONVERT_INTN(select(e, 0L, t));
return select(__CLC_AS_GENTYPE(i), x, t);
}
#undef __CLC_AS_LONGN
#undef __CLC_LONGN
#undef __CLC_CONVERT_INTN
#endif
#undef __CLC_AS_GENTYPE
#undef __CLC_AS_INTN