[libclc] Move rotate to CLC library; optimize (#125713)

This commit moves the rotate builtin to the CLC library.

It also optimizes rotate(x, n) to generate the @llvm.fshl(x, x, n)
intrinsic, for both scalar and vector types. The previous implementation
was too cautious in its handling of the shift amount; the OpenCL rules
state that the shift amount is always treated as an unsigned value
modulo the bitwidth.
This commit is contained in:
Fraser Cormack 2025-02-05 10:38:23 +00:00 committed by GitHub
parent 6c84d64ffc
commit 76d1cb22c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 46 additions and 43 deletions

View File

@ -0,0 +1,12 @@
#ifndef __CLC_INTEGER_CLC_ROTATE_H__
#define __CLC_INTEGER_CLC_ROTATE_H__
#define __CLC_FUNCTION __clc_rotate
#define __CLC_BODY <clc/shared/binary_decl.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#undef __CLC_FUNCTION
#endif // __CLC_INTEGER_CLC_ROTATE_H__

View File

@ -7,6 +7,7 @@
../generic/integer/clc_mul_hi.cl
../generic/integer/clc_popcount.cl
../generic/integer/clc_rhadd.cl
../generic/integer/clc_rotate.cl
../generic/integer/clc_sub_sat.cl
../generic/integer/clc_upsample.cl
../generic/math/clc_ceil.cl

View File

@ -13,6 +13,7 @@ integer/clc_mul24.cl
integer/clc_mul_hi.cl
integer/clc_popcount.cl
integer/clc_rhadd.cl
integer/clc_rotate.cl
integer/clc_sub_sat.cl
integer/clc_upsample.cl
math/clc_ceil.cl

View File

@ -0,0 +1,5 @@
#include <clc/internal/clc.h>
#include <clc/utils.h>
#define __CLC_BODY <clc_rotate.inc>
#include <clc/integer/gentype.inc>

View File

@ -0,0 +1,22 @@
#define __CLC_AS_GENTYPE(x) __CLC_XCONCAT(__clc_as_, __CLC_GENTYPE)(x)
#define __CLC_AS_U_GENTYPE(x) __CLC_XCONCAT(__clc_as_, __CLC_U_GENTYPE)(x)
// The rotate(A, B) builtin left-shifts corresponding to the usual OpenCL shift
// modulo rules. These rules state that A is left-shifted by the log2(N) least
// significant bits in B when viewed as an unsigned integer value. Thus we don't
// have to worry about signed shift amounts, and can perform the computation in
// unsigned types.
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rotate(__CLC_GENTYPE x,
__CLC_GENTYPE n) {
__CLC_U_GENTYPE x_as_u = __CLC_AS_U_GENTYPE(x);
__CLC_U_GENTYPE mask = (__CLC_U_GENTYPE)(__CLC_GENSIZE - 1);
__CLC_U_GENTYPE lshift_amt = __CLC_AS_U_GENTYPE(n) & mask;
__CLC_U_GENTYPE rshift_amt =
(((__CLC_U_GENTYPE)__CLC_GENSIZE - lshift_amt) & mask);
__CLC_U_GENTYPE result = (x_as_u << lshift_amt) | (x_as_u >> rshift_amt);
return __CLC_AS_GENTYPE(result);
}

View File

@ -11,6 +11,7 @@
../generic/integer/clc_mul_hi.cl
../generic/integer/clc_popcount.cl
../generic/integer/clc_rhadd.cl
../generic/integer/clc_rotate.cl
../generic/integer/clc_sub_sat.cl
../generic/integer/clc_upsample.cl
../generic/math/clc_ceil.cl

View File

@ -1,4 +1,7 @@
#include <clc/clc.h>
#include <clc/integer/clc_rotate.h>
#define FUNCTION rotate
#define __CLC_BODY <clc/shared/binary_def.inc>
#define __CLC_BODY <rotate.inc>
#include <clc/integer/gentype.inc>

View File

@ -1,42 +0,0 @@
/**
* Not necessarily optimal... but it produces correct results (at least for int)
* If we're lucky, LLVM will recognize the pattern and produce rotate
* instructions:
* http://llvm.1065342.n5.nabble.com/rotate-td47679.html
*
* Eventually, someone should feel free to implement an llvm-specific version
*/
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE n){
//Try to avoid extra work if someone's spinning the value through multiple
//full rotations
n = n % (__CLC_GENTYPE)__CLC_GENSIZE;
#ifdef __CLC_SCALAR
if (n > 0){
return (x << n) | (((__CLC_U_GENTYPE)x) >> (__CLC_GENSIZE - n));
} else if (n == 0){
return x;
} else {
return ( (((__CLC_U_GENTYPE)x) >> -n) | (x << (__CLC_GENSIZE + n)) );
}
#else
//XXX: There's a lot of __builtin_astype calls to cast everything to
// unsigned ... This should be improved so that if __CLC_GENTYPE==__CLC_U_GENTYPE, no
// casts are required.
__CLC_U_GENTYPE x_1 = __builtin_astype(x, __CLC_U_GENTYPE);
//XXX: Is (__CLC_U_GENTYPE >> S__CLC_GENTYPE) | (__CLC_U_GENTYPE << S__CLC_GENTYPE) legal?
// If so, then combine the amt and shifts into a single set of statements
__CLC_U_GENTYPE amt;
amt = (n < (__CLC_GENTYPE)0 ? __builtin_astype((__CLC_GENTYPE)0-n, __CLC_U_GENTYPE) : (__CLC_U_GENTYPE)0);
x_1 = (x_1 >> amt) | (x_1 << ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt));
amt = (n < (__CLC_GENTYPE)0 ? (__CLC_U_GENTYPE)0 : __builtin_astype(n, __CLC_U_GENTYPE));
x_1 = (x_1 << amt) | (x_1 >> ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt));
return __builtin_astype(x_1, __CLC_GENTYPE);
#endif
}