mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 17:46:49 +00:00

This is fairly straightforward for most targets. We use the element-wise sqrt builtin by default. We also remove a legacy pre-filtering of the input argument, which the intrinsic now officially handles. AMDGPU provides its own implementation of sqrt for double types. This commit moves this into the implementation of CLC sqrt. It uses weak linkage on the 'default' CLC sqrt to allow AMDGPU to only override the builtin for the types it cares about.
63 lines
2.1 KiB
Common Lisp
63 lines
2.1 KiB
Common Lisp
/*
|
|
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
#include <clc/clcmacro.h>
|
|
#include <clc/internal/clc.h>
|
|
#include <clc/math/clc_fma.h>
|
|
#include <clc/math/clc_ldexp.h>
|
|
|
|
#ifdef cl_khr_fp64
|
|
|
|
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
|
|
|
#ifdef __AMDGCN__
|
|
#define __clc_builtin_rsq __builtin_amdgcn_rsq
|
|
#else
|
|
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
|
|
#endif
|
|
|
|
_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
|
|
uint vcc = x < 0x1p-767;
|
|
uint exp0 = vcc ? 0x100 : 0;
|
|
unsigned exp1 = vcc ? 0xffffff80 : 0;
|
|
|
|
double v01 = __clc_ldexp(x, exp0);
|
|
double v23 = __clc_builtin_rsq(v01);
|
|
double v45 = v01 * v23;
|
|
v23 = v23 * 0.5;
|
|
|
|
double v67 = __clc_fma(-v23, v45, 0.5);
|
|
v45 = __clc_fma(v45, v67, v45);
|
|
double v89 = __clc_fma(-v45, v45, v01);
|
|
v23 = __clc_fma(v23, v67, v23);
|
|
v45 = __clc_fma(v89, v23, v45);
|
|
v67 = __clc_fma(-v45, v45, v01);
|
|
v23 = __clc_fma(v67, v23, v45);
|
|
|
|
v23 = __clc_ldexp(v23, exp1);
|
|
return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23;
|
|
}
|
|
|
|
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double);
|
|
|
|
#endif
|