mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-14 17:06:38 +00:00
[libclc] Move shuffle/shuffle2 to the CLC library (#135000)
This commit moves the shuffle and shuffle2 builtins to the CLC library. In so doing it makes the headers simpler and re-usable for other builtin layers to hook into the CLC functions, if they wish. An additional gentype utility has been made available, which provides a consistent vector-size-or-1 macro for use. The existing __CLC_VECSIZE is defined but empty which is useful in certain applications, such as in concatenation with a type to make a correctly sized scalar or vector type. However, this isn't usable in the same preprocessor lines when wanting to check for specific vector sizes, as e.g., '__CLC_VECSIZE == 2' resolves to '== 2' which is invalid. In local testing this is also useful for the geometric builtins which are only available for scalar types and vector types of 2, 3, or 4 elements. No codegen changes are observed, except the internal shuffle/shuffle2 utility functions are no longer made publicly available.
This commit is contained in:
parent
5083e80c14
commit
b0338c3d6c
@ -27,15 +27,19 @@
|
||||
#define __CLC_GENTYPE char
|
||||
#define __CLC_U_GENTYPE uchar
|
||||
#define __CLC_S_GENTYPE char
|
||||
#define __CLC_SCALAR 1
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE char2
|
||||
#define __CLC_U_GENTYPE uchar2
|
||||
#define __CLC_S_GENTYPE char2
|
||||
@ -85,6 +89,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
|
||||
#undef __CLC_SCALAR_GENTYPE
|
||||
#define __CLC_SCALAR_GENTYPE uchar
|
||||
@ -92,15 +97,19 @@
|
||||
#define __CLC_GENTYPE uchar
|
||||
#define __CLC_U_GENTYPE uchar
|
||||
#define __CLC_S_GENTYPE char
|
||||
#define __CLC_SCALAR 1
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE uchar2
|
||||
#define __CLC_U_GENTYPE uchar2
|
||||
#define __CLC_S_GENTYPE char2
|
||||
@ -150,6 +159,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
|
||||
#undef __CLC_GENSIZE
|
||||
#define __CLC_GENSIZE 16
|
||||
@ -159,15 +169,19 @@
|
||||
#define __CLC_GENTYPE short
|
||||
#define __CLC_U_GENTYPE ushort
|
||||
#define __CLC_S_GENTYPE short
|
||||
#define __CLC_SCALAR 1
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE short2
|
||||
#define __CLC_U_GENTYPE ushort2
|
||||
#define __CLC_S_GENTYPE short2
|
||||
@ -217,6 +231,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
|
||||
#undef __CLC_SCALAR_GENTYPE
|
||||
#define __CLC_SCALAR_GENTYPE ushort
|
||||
@ -224,15 +239,19 @@
|
||||
#define __CLC_GENTYPE ushort
|
||||
#define __CLC_U_GENTYPE ushort
|
||||
#define __CLC_S_GENTYPE short
|
||||
#define __CLC_SCALAR 1
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE ushort2
|
||||
#define __CLC_U_GENTYPE ushort2
|
||||
#define __CLC_S_GENTYPE short2
|
||||
@ -282,6 +301,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
|
||||
#undef __CLC_GENSIZE
|
||||
#define __CLC_GENSIZE 32
|
||||
@ -291,15 +311,19 @@
|
||||
#define __CLC_GENTYPE int
|
||||
#define __CLC_U_GENTYPE uint
|
||||
#define __CLC_S_GENTYPE int
|
||||
#define __CLC_SCALAR 1
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE int2
|
||||
#define __CLC_U_GENTYPE uint2
|
||||
#define __CLC_S_GENTYPE int2
|
||||
@ -349,6 +373,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
|
||||
#undef __CLC_SCALAR_GENTYPE
|
||||
#define __CLC_SCALAR_GENTYPE uint
|
||||
@ -356,15 +381,19 @@
|
||||
#define __CLC_GENTYPE uint
|
||||
#define __CLC_U_GENTYPE uint
|
||||
#define __CLC_S_GENTYPE int
|
||||
#define __CLC_SCALAR 1
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE uint2
|
||||
#define __CLC_U_GENTYPE uint2
|
||||
#define __CLC_S_GENTYPE int2
|
||||
@ -414,6 +443,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
|
||||
#undef __CLC_GENSIZE
|
||||
#define __CLC_GENSIZE 64
|
||||
@ -423,15 +453,19 @@
|
||||
#define __CLC_GENTYPE long
|
||||
#define __CLC_U_GENTYPE ulong
|
||||
#define __CLC_S_GENTYPE long
|
||||
#define __CLC_SCALAR 1
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE long2
|
||||
#define __CLC_U_GENTYPE ulong2
|
||||
#define __CLC_S_GENTYPE long2
|
||||
@ -481,6 +515,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
|
||||
#undef __CLC_SCALAR_GENTYPE
|
||||
#define __CLC_SCALAR_GENTYPE ulong
|
||||
@ -488,15 +523,20 @@
|
||||
#define __CLC_GENTYPE ulong
|
||||
#define __CLC_U_GENTYPE ulong
|
||||
#define __CLC_S_GENTYPE long
|
||||
#define __CLC_SCALAR 1
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE ulong2
|
||||
#define __CLC_U_GENTYPE ulong2
|
||||
#define __CLC_S_GENTYPE long2
|
||||
@ -546,6 +586,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
|
||||
#undef __CLC_GENSIZE
|
||||
#undef __CLC_SCALAR_GENTYPE
|
||||
|
@ -81,12 +81,16 @@
|
||||
#define __CLC_BIT_INTN int
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_BIT_INTN
|
||||
#undef __CLC_SCALAR
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE float2
|
||||
#define __CLC_BIT_INTN int2
|
||||
#define __CLC_VECSIZE 2
|
||||
@ -127,6 +131,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_BIT_INTN
|
||||
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_GENTYPE_NAN
|
||||
@ -150,14 +155,18 @@
|
||||
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#define __CLC_GENTYPE double
|
||||
#define __CLC_BIT_INTN long
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_BIT_INTN
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE double2
|
||||
#define __CLC_BIT_INTN long2
|
||||
#define __CLC_VECSIZE 2
|
||||
@ -198,6 +207,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_BIT_INTN
|
||||
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_GENTYPE_NAN
|
||||
@ -221,14 +231,18 @@
|
||||
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_VECSIZE
|
||||
#define __CLC_VECSIZE_OR_1 1
|
||||
#define __CLC_GENTYPE half
|
||||
#define __CLC_BIT_INTN short
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_BIT_INTN
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_SCALAR
|
||||
|
||||
#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
|
||||
|
||||
#define __CLC_GENTYPE half2
|
||||
#define __CLC_BIT_INTN short2
|
||||
#define __CLC_VECSIZE 2
|
||||
@ -269,6 +283,7 @@
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_BIT_INTN
|
||||
|
||||
#undef __CLC_VECSIZE_OR_1
|
||||
#undef __CLC_U_GENTYPE
|
||||
#undef __CLC_S_GENTYPE
|
||||
#undef __CLC_GENTYPE_NAN
|
||||
|
24
libclc/clc/include/clc/misc/clc_shuffle.h
Normal file
24
libclc/clc/include/clc/misc/clc_shuffle.h
Normal file
@ -0,0 +1,24 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MISC_CLC_SHUFFLE_H__
|
||||
#define __CLC_MISC_CLC_SHUFFLE_H__
|
||||
|
||||
#define __CLC_FUNCTION __clc_shuffle
|
||||
|
||||
// Integer-type decls
|
||||
#define __CLC_BODY <clc/misc/shuffle_decl.inc>
|
||||
#include <clc/integer/gentype.inc>
|
||||
|
||||
// Floating-point decls
|
||||
#define __CLC_BODY <clc/misc/shuffle_decl.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MISC_CLC_SHUFFLE_H__
|
24
libclc/clc/include/clc/misc/clc_shuffle2.h
Normal file
24
libclc/clc/include/clc/misc/clc_shuffle2.h
Normal file
@ -0,0 +1,24 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MISC_CLC_SHUFFLE2_H__
|
||||
#define __CLC_MISC_CLC_SHUFFLE2_H__
|
||||
|
||||
#define __CLC_FUNCTION __clc_shuffle2
|
||||
|
||||
// Integer-type decls
|
||||
#define __CLC_BODY <clc/misc/shuffle2_decl.inc>
|
||||
#include <clc/integer/gentype.inc>
|
||||
|
||||
// Floating-point decls
|
||||
#define __CLC_BODY <clc/misc/shuffle2_decl.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MISC_CLC_SHUFFLE2_H__
|
28
libclc/clc/include/clc/misc/shuffle2_decl.inc
Normal file
28
libclc/clc/include/clc/misc/shuffle2_decl.inc
Normal file
@ -0,0 +1,28 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 || \
|
||||
__CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16)
|
||||
|
||||
// The return type is same base type as the input type, with the same vector
|
||||
// size as the mask. Elements in the mask must be the same size (number of bits)
|
||||
// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE mask);
|
||||
|
||||
#endif
|
42
libclc/clc/include/clc/misc/shuffle2_def.inc
Normal file
42
libclc/clc/include/clc/misc/shuffle2_def.inc
Normal file
@ -0,0 +1,42 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 || \
|
||||
__CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16)
|
||||
|
||||
#include <clc/utils.h>
|
||||
|
||||
#ifndef __CLC_FUNCTION
|
||||
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
|
||||
#endif
|
||||
|
||||
// The return type is same base type as the input type, with the same vector
|
||||
// size as the mask. Elements in the mask must be the same size (number of bits)
|
||||
// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_FUNCTION(FUNCTION)(x, y, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_FUNCTION(FUNCTION)(x, y, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_FUNCTION(FUNCTION)(x, y, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_FUNCTION(FUNCTION)(x, y, mask);
|
||||
}
|
||||
|
||||
#endif
|
24
libclc/clc/include/clc/misc/shuffle_decl.inc
Normal file
24
libclc/clc/include/clc/misc/shuffle_decl.inc
Normal file
@ -0,0 +1,24 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 || \
|
||||
__CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16)
|
||||
|
||||
// The return type is same base type as the input type, with the same vector
|
||||
// size as the mask. Elements in the mask must be the same size (number of bits)
|
||||
// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE mask);
|
||||
|
||||
#endif
|
38
libclc/clc/include/clc/misc/shuffle_def.inc
Normal file
38
libclc/clc/include/clc/misc/shuffle_def.inc
Normal file
@ -0,0 +1,38 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 || \
|
||||
__CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16)
|
||||
|
||||
#include <clc/utils.h>
|
||||
|
||||
#ifndef __CLC_FUNCTION
|
||||
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
|
||||
#endif
|
||||
|
||||
// The return type is same base type as the input type, with the same vector
|
||||
// size as the mask. Elements in the mask must be the same size (number of bits)
|
||||
// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_FUNCTION(FUNCTION)(x, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_FUNCTION(FUNCTION)(x, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_FUNCTION(FUNCTION)(x, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_FUNCTION(FUNCTION)(x, mask);
|
||||
}
|
||||
|
||||
#endif
|
@ -89,6 +89,8 @@ math/clc_tanh.cl
|
||||
math/clc_tanpi.cl
|
||||
math/clc_tgamma.cl
|
||||
math/clc_trunc.cl
|
||||
misc/clc_shuffle.cl
|
||||
misc/clc_shuffle2.cl
|
||||
relational/clc_all.cl
|
||||
relational/clc_any.cl
|
||||
relational/clc_bitselect.cl
|
||||
|
173
libclc/clc/lib/generic/misc/clc_shuffle.cl
Normal file
173
libclc/clc/lib/generic/misc/clc_shuffle.cl
Normal file
@ -0,0 +1,173 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/internal/clc.h>
|
||||
|
||||
#define _CLC_ELEMENT_CASES2(VAR) \
|
||||
case 0: \
|
||||
return VAR.s0; \
|
||||
case 1: \
|
||||
return VAR.s1;
|
||||
|
||||
#define _CLC_ELEMENT_CASES4(VAR) \
|
||||
_CLC_ELEMENT_CASES2(VAR) \
|
||||
case 2: \
|
||||
return VAR.s2; \
|
||||
case 3: \
|
||||
return VAR.s3;
|
||||
|
||||
#define _CLC_ELEMENT_CASES8(VAR) \
|
||||
_CLC_ELEMENT_CASES4(VAR) \
|
||||
case 4: \
|
||||
return VAR.s4; \
|
||||
case 5: \
|
||||
return VAR.s5; \
|
||||
case 6: \
|
||||
return VAR.s6; \
|
||||
case 7: \
|
||||
return VAR.s7;
|
||||
|
||||
#define _CLC_ELEMENT_CASES16(VAR) \
|
||||
_CLC_ELEMENT_CASES8(VAR) \
|
||||
case 8: \
|
||||
return VAR.s8; \
|
||||
case 9: \
|
||||
return VAR.s9; \
|
||||
case 10: \
|
||||
return VAR.sA; \
|
||||
case 11: \
|
||||
return VAR.sB; \
|
||||
case 12: \
|
||||
return VAR.sC; \
|
||||
case 13: \
|
||||
return VAR.sD; \
|
||||
case 14: \
|
||||
return VAR.sE; \
|
||||
case 15: \
|
||||
return VAR.sF;
|
||||
|
||||
#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
|
||||
inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE( \
|
||||
ARGTYPE##ARGSIZE x, IDXTYPE idx) { \
|
||||
switch (idx) { _CLC_ELEMENT_CASES##ARGSIZE(x) default : return 0; } \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
|
||||
ret_val.s##INDEX = \
|
||||
__clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
|
||||
ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
|
||||
ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
|
||||
ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
|
||||
ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
|
||||
ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
|
||||
ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
|
||||
ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
|
||||
ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
|
||||
ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
|
||||
ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
|
||||
ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
|
||||
ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF);
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 __clc_shuffle(ARGTYPE##ARGSIZE x, \
|
||||
MASKTYPE##2 mask) { \
|
||||
ARGTYPE##2 ret_val; \
|
||||
mask &= (MASKTYPE##2)(ARGSIZE - 1); \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 __clc_shuffle(ARGTYPE##ARGSIZE x, \
|
||||
MASKTYPE##4 mask) { \
|
||||
ARGTYPE##4 ret_val; \
|
||||
mask &= (MASKTYPE##4)(ARGSIZE - 1); \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 __clc_shuffle(ARGTYPE##ARGSIZE x, \
|
||||
MASKTYPE##8 mask) { \
|
||||
ARGTYPE##8 ret_val; \
|
||||
mask &= (MASKTYPE##8)(ARGSIZE - 1); \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 __clc_shuffle(ARGTYPE##ARGSIZE x, \
|
||||
MASKTYPE##16 mask) { \
|
||||
ARGTYPE##16 ret_val; \
|
||||
mask &= (MASKTYPE##16)(ARGSIZE - 1); \
|
||||
_CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE)
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE)
|
||||
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
|
||||
#endif
|
||||
|
||||
#undef _CLC_ELEMENT_CASES2
|
||||
#undef _CLC_ELEMENT_CASES4
|
||||
#undef _CLC_ELEMENT_CASES8
|
||||
#undef _CLC_ELEMENT_CASES16
|
||||
#undef _CLC_GET_ELEMENT_DEFINE
|
||||
#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
|
||||
#undef _CLC_SHUFFLE_SET_2_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_4_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_8_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_16_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_DEFINE2
|
||||
#undef _CLC_SHUFFLE_DEFINE4
|
||||
#undef _CLC_SHUFFLE_DEFINE8
|
||||
#undef _CLC_SHUFFLE_DEFINE16
|
||||
#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
|
||||
#undef _CLC_VECTOR_SHUFFLE_INSIZE
|
174
libclc/clc/lib/generic/misc/clc_shuffle2.cl
Normal file
174
libclc/clc/lib/generic/misc/clc_shuffle2.cl
Normal file
@ -0,0 +1,174 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/internal/clc.h>
|
||||
|
||||
#define _CLC_ELEMENT_CASES2(VAR) \
|
||||
case 0: \
|
||||
return VAR.s0; \
|
||||
case 1: \
|
||||
return VAR.s1;
|
||||
|
||||
#define _CLC_ELEMENT_CASES4(VAR) \
|
||||
_CLC_ELEMENT_CASES2(VAR) \
|
||||
case 2: \
|
||||
return VAR.s2; \
|
||||
case 3: \
|
||||
return VAR.s3;
|
||||
|
||||
#define _CLC_ELEMENT_CASES8(VAR) \
|
||||
_CLC_ELEMENT_CASES4(VAR) \
|
||||
case 4: \
|
||||
return VAR.s4; \
|
||||
case 5: \
|
||||
return VAR.s5; \
|
||||
case 6: \
|
||||
return VAR.s6; \
|
||||
case 7: \
|
||||
return VAR.s7;
|
||||
|
||||
#define _CLC_ELEMENT_CASES16(VAR) \
|
||||
_CLC_ELEMENT_CASES8(VAR) \
|
||||
case 8: \
|
||||
return VAR.s8; \
|
||||
case 9: \
|
||||
return VAR.s9; \
|
||||
case 10: \
|
||||
return VAR.sA; \
|
||||
case 11: \
|
||||
return VAR.sB; \
|
||||
case 12: \
|
||||
return VAR.sC; \
|
||||
case 13: \
|
||||
return VAR.sD; \
|
||||
case 14: \
|
||||
return VAR.sE; \
|
||||
case 15: \
|
||||
return VAR.sF;
|
||||
|
||||
#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
|
||||
__attribute__((always_inline)) ARGTYPE \
|
||||
__clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE( \
|
||||
ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, IDXTYPE idx) { \
|
||||
if (idx < ARGSIZE) \
|
||||
switch (idx) { _CLC_ELEMENT_CASES##ARGSIZE(x) default : return 0; } \
|
||||
else \
|
||||
switch (idx - ARGSIZE) { \
|
||||
_CLC_ELEMENT_CASES##ARGSIZE(y) default : return 0; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0); \
|
||||
ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2); \
|
||||
ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4); \
|
||||
ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5); \
|
||||
ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6); \
|
||||
ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8); \
|
||||
ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9); \
|
||||
ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA); \
|
||||
ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB); \
|
||||
ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC); \
|
||||
ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD); \
|
||||
ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE); \
|
||||
ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF);
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 __clc_shuffle2( \
|
||||
ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##2 mask) { \
|
||||
ARGTYPE##2 ret_val; \
|
||||
mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1); \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 __clc_shuffle2( \
|
||||
ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##4 mask) { \
|
||||
ARGTYPE##4 ret_val; \
|
||||
mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1); \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 __clc_shuffle2( \
|
||||
ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##8 mask) { \
|
||||
ARGTYPE##8 ret_val; \
|
||||
mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1); \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 __clc_shuffle2( \
|
||||
ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##16 mask) { \
|
||||
ARGTYPE##16 ret_val; \
|
||||
mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1); \
|
||||
_CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE)
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE)
|
||||
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
|
||||
#endif
|
||||
|
||||
#undef _CLC_ELEMENT_CASES2
|
||||
#undef _CLC_ELEMENT_CASES4
|
||||
#undef _CLC_ELEMENT_CASES8
|
||||
#undef _CLC_ELEMENT_CASES16
|
||||
#undef _CLC_GET_ELEMENT_DEFINE
|
||||
#undef _CLC_SHUFFLE_SET_2_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_4_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_8_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_16_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_DEFINE2
|
||||
#undef _CLC_SHUFFLE_DEFINE4
|
||||
#undef _CLC_SHUFFLE_DEFINE8
|
||||
#undef _CLC_SHUFFLE_DEFINE16
|
||||
#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
|
||||
#undef _CLC_VECTOR_SHUFFLE_INSIZE
|
@ -6,41 +6,14 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define _CLC_SHUFFLE_DECL(TYPE, MASKTYPE, RETTYPE) \
|
||||
_CLC_OVERLOAD _CLC_DECL RETTYPE shuffle(TYPE x, MASKTYPE mask);
|
||||
#define __CLC_FUNCTION shuffle
|
||||
|
||||
//Return type is same base type as the input type, with the same vector size as the mask.
|
||||
//Elements in the mask must be the same size (number of bits) as the input value.
|
||||
//E.g. char8 ret = shuffle(char2 x, uchar8 mask);
|
||||
// Integer-type decls
|
||||
#define __CLC_BODY <clc/misc/shuffle_decl.inc>
|
||||
#include <clc/integer/gentype.inc>
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INBASE, INTYPE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##2, INBASE##2) \
|
||||
_CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##4, INBASE##4) \
|
||||
_CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##8, INBASE##8) \
|
||||
_CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##16, INBASE##16) \
|
||||
// Floating-point decls
|
||||
#define __CLC_BODY <clc/misc/shuffle_decl.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##2, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##4, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##8, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##16, MASKTYPE) \
|
||||
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
|
||||
#ifdef cl_khr_fp64
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
|
||||
#endif
|
||||
|
||||
#undef _CLC_SHUFFLE_DECL
|
||||
#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
|
||||
#undef _CLC_VECTOR_SHUFFLE_INSIZE
|
||||
#undef __CLC_FUNCTION
|
||||
|
@ -7,150 +7,12 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/misc/clc_shuffle.h>
|
||||
|
||||
#define _CLC_ELEMENT_CASES2(VAR) \
|
||||
case 0: return VAR.s0; \
|
||||
case 1: return VAR.s1;
|
||||
#define FUNCTION shuffle
|
||||
|
||||
#define _CLC_ELEMENT_CASES4(VAR) \
|
||||
_CLC_ELEMENT_CASES2(VAR) \
|
||||
case 2: return VAR.s2; \
|
||||
case 3: return VAR.s3;
|
||||
#define __CLC_BODY <clc/misc/shuffle_def.inc>
|
||||
#include <clc/integer/gentype.inc>
|
||||
|
||||
#define _CLC_ELEMENT_CASES8(VAR) \
|
||||
_CLC_ELEMENT_CASES4(VAR) \
|
||||
case 4: return VAR.s4; \
|
||||
case 5: return VAR.s5; \
|
||||
case 6: return VAR.s6; \
|
||||
case 7: return VAR.s7;
|
||||
|
||||
#define _CLC_ELEMENT_CASES16(VAR) \
|
||||
_CLC_ELEMENT_CASES8(VAR) \
|
||||
case 8: return VAR.s8; \
|
||||
case 9: return VAR.s9; \
|
||||
case 10: return VAR.sA; \
|
||||
case 11: return VAR.sB; \
|
||||
case 12: return VAR.sC; \
|
||||
case 13: return VAR.sD; \
|
||||
case 14: return VAR.sE; \
|
||||
case 15: return VAR.sF;
|
||||
|
||||
#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
|
||||
inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\
|
||||
switch (idx){ \
|
||||
_CLC_ELEMENT_CASES##ARGSIZE(x) \
|
||||
default: return 0; \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
|
||||
ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); \
|
||||
|
||||
#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
|
||||
ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
|
||||
ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
|
||||
ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
|
||||
ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
|
||||
ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
|
||||
ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
|
||||
ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
|
||||
ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
|
||||
ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
|
||||
ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
|
||||
ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
|
||||
ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 mask){ \
|
||||
ARGTYPE##2 ret_val; \
|
||||
mask &= (MASKTYPE##2)(ARGSIZE-1); \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 mask){ \
|
||||
ARGTYPE##4 ret_val; \
|
||||
mask &= (MASKTYPE##4)(ARGSIZE-1); \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 mask){ \
|
||||
ARGTYPE##8 ret_val; \
|
||||
mask &= (MASKTYPE##8)(ARGSIZE-1); \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 mask){ \
|
||||
ARGTYPE##16 ret_val; \
|
||||
mask &= (MASKTYPE##16)(ARGSIZE-1); \
|
||||
_CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
|
||||
|
||||
|
||||
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
|
||||
#endif
|
||||
|
||||
#undef _CLC_ELEMENT_CASES2
|
||||
#undef _CLC_ELEMENT_CASES4
|
||||
#undef _CLC_ELEMENT_CASES8
|
||||
#undef _CLC_ELEMENT_CASES16
|
||||
#undef _CLC_GET_ELEMENT_DEFINE
|
||||
#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
|
||||
#undef _CLC_SHUFFLE_SET_2_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_4_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_8_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_16_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_DEFINE2
|
||||
#undef _CLC_SHUFFLE_DEFINE4
|
||||
#undef _CLC_SHUFFLE_DEFINE8
|
||||
#undef _CLC_SHUFFLE_DEFINE16
|
||||
#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
|
||||
#undef _CLC_VECTOR_SHUFFLE_INSIZE
|
||||
#define __CLC_BODY <clc/misc/shuffle_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -7,153 +7,12 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/misc/clc_shuffle2.h>
|
||||
|
||||
#define _CLC_ELEMENT_CASES2(VAR) \
|
||||
case 0: return VAR.s0; \
|
||||
case 1: return VAR.s1;
|
||||
#define FUNCTION shuffle2
|
||||
|
||||
#define _CLC_ELEMENT_CASES4(VAR) \
|
||||
_CLC_ELEMENT_CASES2(VAR) \
|
||||
case 2: return VAR.s2; \
|
||||
case 3: return VAR.s3;
|
||||
#define __CLC_BODY <clc/misc/shuffle2_def.inc>
|
||||
#include <clc/integer/gentype.inc>
|
||||
|
||||
#define _CLC_ELEMENT_CASES8(VAR) \
|
||||
_CLC_ELEMENT_CASES4(VAR) \
|
||||
case 4: return VAR.s4; \
|
||||
case 5: return VAR.s5; \
|
||||
case 6: return VAR.s6; \
|
||||
case 7: return VAR.s7;
|
||||
|
||||
#define _CLC_ELEMENT_CASES16(VAR) \
|
||||
_CLC_ELEMENT_CASES8(VAR) \
|
||||
case 8: return VAR.s8; \
|
||||
case 9: return VAR.s9; \
|
||||
case 10: return VAR.sA; \
|
||||
case 11: return VAR.sB; \
|
||||
case 12: return VAR.sC; \
|
||||
case 13: return VAR.sD; \
|
||||
case 14: return VAR.sE; \
|
||||
case 15: return VAR.sF;
|
||||
|
||||
#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
|
||||
__attribute__((always_inline)) \
|
||||
ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, IDXTYPE idx) {\
|
||||
if (idx < ARGSIZE) \
|
||||
switch (idx){ \
|
||||
_CLC_ELEMENT_CASES##ARGSIZE(x) \
|
||||
default: return 0; \
|
||||
} \
|
||||
else \
|
||||
switch (idx - ARGSIZE){ \
|
||||
_CLC_ELEMENT_CASES##ARGSIZE(y) \
|
||||
default: return 0; \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0); \
|
||||
ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2); \
|
||||
ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4); \
|
||||
ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5); \
|
||||
ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6); \
|
||||
ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8); \
|
||||
ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9); \
|
||||
ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA); \
|
||||
ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB); \
|
||||
ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC); \
|
||||
ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD); \
|
||||
ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE); \
|
||||
ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF); \
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##2 mask){ \
|
||||
ARGTYPE##2 ret_val; \
|
||||
mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1); \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##4 mask){ \
|
||||
ARGTYPE##4 ret_val; \
|
||||
mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1); \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##8 mask){ \
|
||||
ARGTYPE##8 ret_val; \
|
||||
mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1); \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##16 mask){ \
|
||||
ARGTYPE##16 ret_val; \
|
||||
mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1); \
|
||||
_CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
|
||||
|
||||
|
||||
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
|
||||
#endif
|
||||
|
||||
#undef _CLC_ELEMENT_CASES2
|
||||
#undef _CLC_ELEMENT_CASES4
|
||||
#undef _CLC_ELEMENT_CASES8
|
||||
#undef _CLC_ELEMENT_CASES16
|
||||
#undef _CLC_GET_ELEMENT_DEFINE
|
||||
#undef _CLC_SHUFFLE_SET_2_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_4_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_8_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_16_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_DEFINE2
|
||||
#undef _CLC_SHUFFLE_DEFINE4
|
||||
#undef _CLC_SHUFFLE_DEFINE8
|
||||
#undef _CLC_SHUFFLE_DEFINE16
|
||||
#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
|
||||
#undef _CLC_VECTOR_SHUFFLE_INSIZE
|
||||
#define __CLC_BODY <clc/misc/shuffle2_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
Loading…
x
Reference in New Issue
Block a user