mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-27 13:36:07 +00:00
[X86] Simplify the implementation of _mm_sqrt_ss, _mm_rcp_ss, and _mm_rsqrt_ss.
We don't need the insertion back into the original vector at the end. The builtin already understands that. This is different than _mm_sqrt_sd which takes two arguments and we do need to insert. llvm-svn: 333572
This commit is contained in:
parent
c729c1b47d
commit
c5ec55e921
@ -224,8 +224,7 @@ _mm_div_ps(__m128 __a, __m128 __b)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_sqrt_ss(__m128 __a)
|
||||
{
|
||||
__m128 __c = __builtin_ia32_sqrtss((__v4sf)__a);
|
||||
return (__m128) { __c[0], __a[1], __a[2], __a[3] };
|
||||
return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);
|
||||
}
|
||||
|
||||
/// Calculates the square roots of the values stored in a 128-bit vector
|
||||
@ -260,8 +259,7 @@ _mm_sqrt_ps(__m128 __a)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_rcp_ss(__m128 __a)
|
||||
{
|
||||
__m128 __c = __builtin_ia32_rcpss((__v4sf)__a);
|
||||
return (__m128) { __c[0], __a[1], __a[2], __a[3] };
|
||||
return (__m128)__builtin_ia32_rcpss((__v4sf)__a);
|
||||
}
|
||||
|
||||
/// Calculates the approximate reciprocals of the values stored in a
|
||||
@ -278,7 +276,7 @@ _mm_rcp_ss(__m128 __a)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_rcp_ps(__m128 __a)
|
||||
{
|
||||
return __builtin_ia32_rcpps((__v4sf)__a);
|
||||
return (__m128)__builtin_ia32_rcpps((__v4sf)__a);
|
||||
}
|
||||
|
||||
/// Calculates the approximate reciprocal of the square root of the value
|
||||
@ -297,8 +295,7 @@ _mm_rcp_ps(__m128 __a)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_rsqrt_ss(__m128 __a)
|
||||
{
|
||||
__m128 __c = __builtin_ia32_rsqrtss((__v4sf)__a);
|
||||
return (__m128) { __c[0], __a[1], __a[2], __a[3] };
|
||||
return __builtin_ia32_rsqrtss((__v4sf)__a);
|
||||
}
|
||||
|
||||
/// Calculates the approximate reciprocals of the square roots of the
|
||||
|
@ -508,14 +508,6 @@ __m128 test_mm_rcp_ps(__m128 x) {
|
||||
__m128 test_mm_rcp_ss(__m128 x) {
|
||||
// CHECK-LABEL: test_mm_rcp_ss
|
||||
// CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 0
|
||||
// CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 1
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 2
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 3
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
|
||||
return _mm_rcp_ss(x);
|
||||
}
|
||||
|
||||
@ -528,14 +520,6 @@ __m128 test_mm_rsqrt_ps(__m128 x) {
|
||||
__m128 test_mm_rsqrt_ss(__m128 x) {
|
||||
// CHECK-LABEL: test_mm_rsqrt_ss
|
||||
// CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 0
|
||||
// CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 1
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 2
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 3
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
|
||||
return _mm_rsqrt_ss(x);
|
||||
}
|
||||
|
||||
@ -662,14 +646,6 @@ __m128 test_mm_sqrt_ps(__m128 x) {
|
||||
__m128 test_sqrt_ss(__m128 x) {
|
||||
// CHECK: define {{.*}} @test_sqrt_ss
|
||||
// CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 0
|
||||
// CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 1
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 2
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 3
|
||||
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
|
||||
return _mm_sqrt_ss(x);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user