mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 17:36:36 +00:00

This commit moves the shuffle and shuffle2 builtins to the CLC library. In so doing it makes the headers simpler and re-usable for other builtin layers to hook into the CLC functions, if they wish. An additional gentype utility has been made available, which provides a consistent vector-size-or-1 macro for use. The existing __CLC_VECSIZE is defined but empty which is useful in certain applications, such as in concatenation with a type to make a correctly sized scalar or vector type. However, this isn't usable in the same preprocessor lines when wanting to check for specific vector sizes, as e.g., '__CLC_VECSIZE == 2' resolves to '== 2' which is invalid. In local testing this is also useful for the geometric builtins which are only available for scalar types and vector types of 2, 3, or 4 elements. No codegen changes are observed, except the internal shuffle/shuffle2 utility functions are no longer made publicly available.
174 lines
9.7 KiB
Common Lisp
174 lines
9.7 KiB
Common Lisp
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include <clc/internal/clc.h>
|
|
|
|
#define _CLC_ELEMENT_CASES2(VAR) \
|
|
case 0: \
|
|
return VAR.s0; \
|
|
case 1: \
|
|
return VAR.s1;
|
|
|
|
#define _CLC_ELEMENT_CASES4(VAR) \
|
|
_CLC_ELEMENT_CASES2(VAR) \
|
|
case 2: \
|
|
return VAR.s2; \
|
|
case 3: \
|
|
return VAR.s3;
|
|
|
|
#define _CLC_ELEMENT_CASES8(VAR) \
|
|
_CLC_ELEMENT_CASES4(VAR) \
|
|
case 4: \
|
|
return VAR.s4; \
|
|
case 5: \
|
|
return VAR.s5; \
|
|
case 6: \
|
|
return VAR.s6; \
|
|
case 7: \
|
|
return VAR.s7;
|
|
|
|
#define _CLC_ELEMENT_CASES16(VAR) \
|
|
_CLC_ELEMENT_CASES8(VAR) \
|
|
case 8: \
|
|
return VAR.s8; \
|
|
case 9: \
|
|
return VAR.s9; \
|
|
case 10: \
|
|
return VAR.sA; \
|
|
case 11: \
|
|
return VAR.sB; \
|
|
case 12: \
|
|
return VAR.sC; \
|
|
case 13: \
|
|
return VAR.sD; \
|
|
case 14: \
|
|
return VAR.sE; \
|
|
case 15: \
|
|
return VAR.sF;
|
|
|
|
#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
|
|
inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE( \
|
|
ARGTYPE##ARGSIZE x, IDXTYPE idx) { \
|
|
switch (idx) { _CLC_ELEMENT_CASES##ARGSIZE(x) default : return 0; } \
|
|
}
|
|
|
|
#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
|
|
ret_val.s##INDEX = \
|
|
__clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX);
|
|
|
|
#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
|
|
ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
|
|
|
|
#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
|
|
ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
|
|
|
|
#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
|
|
ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
|
|
ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
|
|
ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
|
|
|
|
#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
|
|
ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
|
|
ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
|
|
ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
|
|
ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
|
|
ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
|
|
ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
|
|
ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF);
|
|
|
|
#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 __clc_shuffle(ARGTYPE##ARGSIZE x, \
|
|
MASKTYPE##2 mask) { \
|
|
ARGTYPE##2 ret_val; \
|
|
mask &= (MASKTYPE##2)(ARGSIZE - 1); \
|
|
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
return ret_val; \
|
|
}
|
|
|
|
#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 __clc_shuffle(ARGTYPE##ARGSIZE x, \
|
|
MASKTYPE##4 mask) { \
|
|
ARGTYPE##4 ret_val; \
|
|
mask &= (MASKTYPE##4)(ARGSIZE - 1); \
|
|
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
return ret_val; \
|
|
}
|
|
|
|
#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 __clc_shuffle(ARGTYPE##ARGSIZE x, \
|
|
MASKTYPE##8 mask) { \
|
|
ARGTYPE##8 ret_val; \
|
|
mask &= (MASKTYPE##8)(ARGSIZE - 1); \
|
|
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
return ret_val; \
|
|
}
|
|
|
|
#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 __clc_shuffle(ARGTYPE##ARGSIZE x, \
|
|
MASKTYPE##16 mask) { \
|
|
ARGTYPE##16 ret_val; \
|
|
mask &= (MASKTYPE##16)(ARGSIZE - 1); \
|
|
_CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
|
return ret_val; \
|
|
}
|
|
|
|
#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
|
|
_CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE)
|
|
|
|
#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
|
|
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
|
|
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
|
|
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
|
|
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE)
|
|
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
|
|
#ifdef cl_khr_fp64
|
|
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
|
|
#endif
|
|
#ifdef cl_khr_fp16
|
|
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
|
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
|
|
#endif
|
|
|
|
#undef _CLC_ELEMENT_CASES2
|
|
#undef _CLC_ELEMENT_CASES4
|
|
#undef _CLC_ELEMENT_CASES8
|
|
#undef _CLC_ELEMENT_CASES16
|
|
#undef _CLC_GET_ELEMENT_DEFINE
|
|
#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
|
|
#undef _CLC_SHUFFLE_SET_2_ELEMENTS
|
|
#undef _CLC_SHUFFLE_SET_4_ELEMENTS
|
|
#undef _CLC_SHUFFLE_SET_8_ELEMENTS
|
|
#undef _CLC_SHUFFLE_SET_16_ELEMENTS
|
|
#undef _CLC_SHUFFLE_DEFINE2
|
|
#undef _CLC_SHUFFLE_DEFINE4
|
|
#undef _CLC_SHUFFLE_DEFINE8
|
|
#undef _CLC_SHUFFLE_DEFINE16
|
|
#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
|
|
#undef _CLC_VECTOR_SHUFFLE_INSIZE
|