diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7a09822a1dd5..e2a680d73ab0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -172,6 +172,20 @@ AMDGPU Support
X86 Support
^^^^^^^^^^^
+- The MMX vector intrinsic functions from ``*mmintrin.h`` which
+ operate on `__m64` vectors, such as ``_mm_add_pi8``, have been
+ reimplemented to use the SSE2 instruction-set and XMM registers
+ unconditionally. These intrinsics are therefore *no longer
+ supported* if MMX is enabled without SSE2 -- either from targeting
+ CPUs from the Pentium-MMX through the Pentium 3, or explicitly via
+ passing arguments such as ``-mmmx -mno-sse2``.
+
+- The compiler builtins such as ``__builtin_ia32_paddb`` which
+ formerly implemented the above MMX intrinsic functions have been
+ removed. Any uses of these removed functions should migrate to the
+ functions defined by the ``*mmintrin.h`` headers. A mapping can be
+ found in the file ``clang/www/builtins.py``.
+
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index a85e7918f4d7..06ca30d65f5b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -47,107 +47,8 @@ TARGET_BUILTIN(__builtin_ia32_writeeflags_u32, "vUi", "n", "")
// doesn't work in the presence of re-declaration of _mm_prefetch for windows.
TARGET_BUILTIN(_mm_prefetch, "vcC*i", "nc", "mmx")
TARGET_BUILTIN(__builtin_ia32_emms, "v", "n", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pand, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pandn, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_por, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pxor, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllq, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlq, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1Oi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllqi, "V1OiV1Oii", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1OiV1Oii", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "nV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_movntq, "vV1Oi*V1Oi", "nV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2ii", "ncV:64:", "mmx")
-
-// MMX2 (MMX+SSE) intrinsics
-TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "ncV:64:", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "ncV:64:", "mmx,sse")
-
-// MMX+SSE2
-TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "ncV:64:", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_paddq, "V1OiV1OiV1Oi", "ncV:64:", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1OiV2iV2i", "ncV:64:", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_psubq, "V1OiV1OiV1Oi", "ncV:64:", "mmx,sse2")
-
-// MMX+SSSE3
-TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "ncV:64:", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "sV4sIi", "ncV:64:", "sse")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4ssIi", "ncV:64:", "sse")
// SSE intrinsics.
TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "ncV:128:", "sse")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f1dee801e4fe..0c4d0efb70ea 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14523,12 +14523,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// TODO: If we had a "freeze" IR instruction to generate a fixed undef
// value, we should use that here instead of a zero.
return llvm::Constant::getNullValue(ConvertType(E->getType()));
- case X86::BI__builtin_ia32_vec_init_v8qi:
- case X86::BI__builtin_ia32_vec_init_v4hi:
- case X86::BI__builtin_ia32_vec_init_v2si:
- return Builder.CreateBitCast(BuildVector(Ops),
- llvm::Type::getX86_MMXTy(getLLVMContext()));
- case X86::BI__builtin_ia32_vec_ext_v2si:
+ case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v16qi:
case X86::BI__builtin_ia32_vec_ext_v8hi:
case X86::BI__builtin_ia32_vec_ext_v4si:
@@ -14546,6 +14541,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Otherwise we could just do this in the header file.
return Builder.CreateExtractElement(Ops[0], Index);
}
+ case X86::BI__builtin_ia32_vec_set_v4hi:
case X86::BI__builtin_ia32_vec_set_v16qi:
case X86::BI__builtin_ia32_vec_set_v8hi:
case X86::BI__builtin_ia32_vec_set_v4si:
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index e85bfc47aa5c..a3176570a468 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -52,9 +52,12 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("sse2,no-evex512"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS_MMX \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("mmx,sse2,no-evex512"), __min_vector_width__(64)))
+
+#define __trunc64(x) \
+ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __anyext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, -1, -1)
/// Adds lower double-precision values in both operands and returns the
/// sum in the lower 64 bits of the result. The upper 64 bits of the result
@@ -1486,8 +1489,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) {
/// \param __a
/// A 128-bit vector of [2 x double].
/// \returns A 64-bit vector of [2 x i32] containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) {
- return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
+static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a) {
+ return __trunc64(__builtin_ia32_cvtpd2dq((__v2df)__a));
}
/// Converts the two double-precision floating-point elements of a
@@ -1505,8 +1508,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) {
/// \param __a
/// A 128-bit vector of [2 x double].
/// \returns A 64-bit vector of [2 x i32] containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) {
- return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
+static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a) {
+ return __trunc64(__builtin_ia32_cvttpd2dq((__v2df)__a));
}
/// Converts the two signed 32-bit integer elements of a 64-bit vector of
@@ -1520,8 +1523,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) {
/// \param __a
/// A 64-bit vector of [2 x i32].
/// \returns A 128-bit vector of [2 x double] containing the converted values.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) {
- return __builtin_ia32_cvtpi2pd((__v2si)__a);
+static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtpi32_pd(__m64 __a) {
+ return (__m128d) __builtin_convertvector((__v2si)__a, __v2df);
}
/// Returns the low-order element of a 128-bit vector of [2 x double] as
@@ -2108,9 +2111,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a,
/// \param __b
/// A 64-bit integer.
/// \returns A 64-bit integer containing the sum of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a,
- __m64 __b) {
- return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
+static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
+ return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b));
}
/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
@@ -2431,9 +2433,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a,
/// \param __b
/// A 64-bit integer containing one of the source operands.
/// \returns A 64-bit integer vector containing the product of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a,
- __m64 __b) {
- return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
+static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
+ return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
+ (__v4si)__anyext128(__b)));
}
/// Multiplies 32-bit unsigned integer values contained in the lower
@@ -2539,9 +2541,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a,
/// A 64-bit integer vector containing the subtrahend.
/// \returns A 64-bit integer vector containing the difference of the values in
/// the operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a,
- __m64 __b) {
- return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
+static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
+ return (__m64)((unsigned long long)__a - (unsigned long long)__b);
}
/// Subtracts the corresponding elements of two [2 x i64] vectors.
@@ -4889,8 +4890,10 @@ void _mm_pause(void);
#if defined(__cplusplus)
} // extern "C"
#endif
+
+#undef __anyext128
+#undef __trunc64
#undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_MMX
#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 4e154e2d8593..9d1e135be63b 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -21,10 +21,33 @@ typedef int __v2si __attribute__((__vector_size__(8)));
typedef short __v4hi __attribute__((__vector_size__(8)));
typedef char __v8qi __attribute__((__vector_size__(8)));
+/* Unsigned types */
+typedef unsigned long long __v1du __attribute__ ((__vector_size__ (8)));
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+typedef unsigned short __v4hu __attribute__((__vector_size__(8)));
+typedef unsigned char __v8qu __attribute__((__vector_size__(8)));
+
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v8qs __attribute__((__vector_size__(8)));
+
+/* SSE/SSE2 types */
+typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__((__vector_size__(16)));
+typedef short __v8hi __attribute__((__vector_size__(16)));
+typedef char __v16qi __attribute__((__vector_size__(16)));
+
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
- __min_vector_width__(64)))
+#define __DEFAULT_FN_ATTRS_SSE2 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("sse2,no-evex512"), __min_vector_width__(128)))
+
+#define __trunc64(x) \
+ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __anyext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, -1, -1)
/// Clears the MMX state by setting the state of the x87 stack registers
/// to empty.
@@ -50,10 +73,10 @@ _mm_empty(void) {
/// A 32-bit integer value.
/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
/// parameter. The upper 32 bits are set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtsi32_si64(int __i)
{
- return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
+ return __extension__ (__m64)(__v2si){__i, 0};
}
/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
@@ -67,10 +90,10 @@ _mm_cvtsi32_si64(int __i)
/// A 64-bit integer vector.
/// \returns A 32-bit signed integer value containing the lower 32 bits of the
/// parameter.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2
_mm_cvtsi64_si32(__m64 __m)
{
- return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
+ return ((__v2si)__m)[0];
}
/// Casts a 64-bit signed integer value into a 64-bit integer vector.
@@ -83,7 +106,7 @@ _mm_cvtsi64_si32(__m64 __m)
/// A 64-bit signed integer.
/// \returns A 64-bit integer vector containing the same bitwise pattern as the
/// parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtsi64_m64(long long __i)
{
return (__m64)__i;
@@ -99,7 +122,7 @@ _mm_cvtsi64_m64(long long __i)
/// A 64-bit integer vector.
/// \returns A 64-bit signed integer containing the same bitwise pattern as the
/// parameter.
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
_mm_cvtm64_si64(__m64 __m)
{
return (long long)__m;
@@ -124,10 +147,11 @@ _mm_cvtm64_si64(__m64 __m)
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_packs_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
+ return __trunc64(__builtin_ia32_packsswb128(
+ (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
}
/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
@@ -149,10 +173,11 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the converted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_packs_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
+ return __trunc64(__builtin_ia32_packssdw128(
+ (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
}
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -174,10 +199,11 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_packs_pu16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
+ return __trunc64(__builtin_ia32_packuswb128(
+ (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
}
/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
@@ -201,10 +227,11 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
/// Bits [63:56] are written to bits [63:56] of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
+ 4, 12, 5, 13, 6, 14, 7, 15);
}
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -224,10 +251,11 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
/// Bits [63:48] are written to bits [63:48] of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
+ 2, 6, 3, 7);
}
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -245,10 +273,10 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
/// the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
+ return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
}
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
@@ -272,10 +300,11 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
/// Bits [31:24] are written to bits [63:56] of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
+ 0, 8, 1, 9, 2, 10, 3, 11);
}
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -295,10 +324,11 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
/// Bits [31:16] are written to bits [63:48] of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
+ 0, 4, 1, 5);
}
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -316,10 +346,10 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
/// the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
+ return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
}
/// Adds each 8-bit integer element of the first 64-bit integer vector
@@ -337,10 +367,10 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_add_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
}
/// Adds each 16-bit integer element of the first 64-bit integer vector
@@ -358,10 +388,10 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_add_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
}
/// Adds each 32-bit integer element of the first 64-bit integer vector
@@ -379,10 +409,10 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_add_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
+ return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
}
/// Adds, with saturation, each 8-bit signed integer element of the first
@@ -403,10 +433,10 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
/// of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_adds_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
}
/// Adds, with saturation, each 16-bit signed integer element of the first
@@ -427,10 +457,10 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
/// of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_adds_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
}
/// Adds, with saturation, each 8-bit unsigned integer element of the first
@@ -450,10 +480,10 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// unsigned sums of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_adds_pu8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
}
/// Adds, with saturation, each 16-bit unsigned integer element of the first
@@ -473,10 +503,10 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// unsigned sums of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_adds_pu16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
}
/// Subtracts each 8-bit integer element of the second 64-bit integer
@@ -494,10 +524,10 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sub_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
}
/// Subtracts each 16-bit integer element of the second 64-bit integer
@@ -515,10 +545,10 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sub_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
}
/// Subtracts each 32-bit integer element of the second 64-bit integer
@@ -536,10 +566,10 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sub_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
+ return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
}
/// Subtracts, with saturation, each 8-bit signed integer element of the second
@@ -560,10 +590,10 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_subs_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
}
/// Subtracts, with saturation, each 16-bit signed integer element of the
@@ -584,10 +614,10 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_subs_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
}
/// Subtracts each 8-bit unsigned integer element of the second 64-bit
@@ -608,10 +638,10 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_subs_pu8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
}
/// Subtracts each 16-bit unsigned integer element of the second 64-bit
@@ -632,10 +662,10 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_subs_pu16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
}
/// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -659,10 +689,11 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
/// products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_madd_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
+ return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1),
+ (__v8hi)__anyext128(__m2)));
}
/// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -680,10 +711,11 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
/// of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
+ return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1),
+ (__v8hi)__anyext128(__m2)));
}
/// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -701,10 +733,10 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
/// of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_mullo_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
}
/// Left-shifts each 16-bit signed integer element of the first
@@ -724,10 +756,11 @@ _mm_mullo_pi16(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
/// values. If \a __count is greater or equal to 16, the result is set to all
/// 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sll_pi16(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
+ return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m),
+ (__v8hi)__anyext128(__count)));
}
/// Left-shifts each 16-bit signed integer element of a 64-bit integer
@@ -746,10 +779,11 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
/// values. If \a __count is greater or equal to 16, the result is set to all
/// 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_slli_pi16(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
+ return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m),
+ __count));
}
/// Left-shifts each 32-bit signed integer element of the first
@@ -769,10 +803,11 @@ _mm_slli_pi16(__m64 __m, int __count)
/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
/// values. If \a __count is greater or equal to 32, the result is set to all
/// 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sll_pi32(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
+ return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m),
+ (__v4si)__anyext128(__count)));
}
/// Left-shifts each 32-bit signed integer element of a 64-bit integer
@@ -791,10 +826,11 @@ _mm_sll_pi32(__m64 __m, __m64 __count)
/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
/// values. If \a __count is greater or equal to 32, the result is set to all
/// 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_slli_pi32(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
+ return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m),
+ __count));
}
/// Left-shifts the first 64-bit integer parameter by the number of bits
@@ -811,10 +847,11 @@ _mm_slli_pi32(__m64 __m, int __count)
/// A 64-bit integer vector interpreted as a single 64-bit integer.
/// \returns A 64-bit integer vector containing the left-shifted value. If
/// \a __count is greater or equal to 64, the result is set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sll_si64(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
+ return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m),
+ (__v2di)__anyext128(__count)));
}
/// Left-shifts the first parameter, which is a 64-bit integer, by the
@@ -831,10 +868,11 @@ _mm_sll_si64(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector containing the left-shifted value. If
/// \a __count is greater or equal to 64, the result is set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_slli_si64(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
+ return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m),
+ __count));
}
/// Right-shifts each 16-bit integer element of the first parameter,
@@ -855,10 +893,11 @@ _mm_slli_si64(__m64 __m, int __count)
/// A 64-bit integer vector interpreted as a single 64-bit integer.
/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sra_pi16(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
+ return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m),
+ (__v8hi)__anyext128(__count)));
}
/// Right-shifts each 16-bit integer element of a 64-bit integer vector
@@ -878,10 +917,11 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srai_pi16(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
+ return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m),
+ __count));
}
/// Right-shifts each 32-bit integer element of the first parameter,
@@ -902,10 +942,11 @@ _mm_srai_pi16(__m64 __m, int __count)
/// A 64-bit integer vector interpreted as a single 64-bit integer.
/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sra_pi32(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
+ return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m),
+ (__v4si)__anyext128(__count)));
}
/// Right-shifts each 32-bit integer element of a 64-bit integer vector
@@ -925,10 +966,11 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srai_pi32(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
+ return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m),
+ __count));
}
/// Right-shifts each 16-bit integer element of the first parameter,
@@ -948,10 +990,11 @@ _mm_srai_pi32(__m64 __m, int __count)
/// A 64-bit integer vector interpreted as a single 64-bit integer.
/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srl_pi16(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
+ return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m),
+ (__v8hi)__anyext128(__count)));
}
/// Right-shifts each 16-bit integer element of a 64-bit integer vector
@@ -970,10 +1013,11 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srli_pi16(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
+ return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m),
+ __count));
}
/// Right-shifts each 32-bit integer element of the first parameter,
@@ -993,10 +1037,11 @@ _mm_srli_pi16(__m64 __m, int __count)
/// A 64-bit integer vector interpreted as a single 64-bit integer.
/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srl_pi32(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
+ return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m),
+ (__v4si)__anyext128(__count)));
}
/// Right-shifts each 32-bit integer element of a 64-bit integer vector
@@ -1015,10 +1060,11 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srli_pi32(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
+ return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m),
+ __count));
}
/// Right-shifts the first 64-bit integer parameter by the number of bits
@@ -1035,10 +1081,11 @@ _mm_srli_pi32(__m64 __m, int __count)
/// \param __count
/// A 64-bit integer vector interpreted as a single 64-bit integer.
/// \returns A 64-bit integer vector containing the right-shifted value.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srl_si64(__m64 __m, __m64 __count)
{
- return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
+ return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m),
+ (__v2di)__anyext128(__count)));
}
/// Right-shifts the first parameter, which is a 64-bit integer, by the
@@ -1056,10 +1103,11 @@ _mm_srl_si64(__m64 __m, __m64 __count)
/// \param __count
/// A 32-bit integer value.
/// \returns A 64-bit integer vector containing the right-shifted value.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srli_si64(__m64 __m, int __count)
{
- return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
+ return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
+ __count));
}
/// Performs a bitwise AND of two 64-bit integer vectors.
@@ -1074,10 +1122,10 @@ _mm_srli_si64(__m64 __m, int __count)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_and_si64(__m64 __m1, __m64 __m2)
{
- return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
+ return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
}
/// Performs a bitwise NOT of the first 64-bit integer vector, and then
@@ -1095,10 +1143,10 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of the second
/// parameter and the one's complement of the first parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_andnot_si64(__m64 __m1, __m64 __m2)
{
- return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
+ return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
}
/// Performs a bitwise OR of two 64-bit integer vectors.
@@ -1113,10 +1161,10 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_or_si64(__m64 __m1, __m64 __m2)
{
- return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
+ return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
}
/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
@@ -1131,10 +1179,10 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_xor_si64(__m64 __m1, __m64 __m2)
{
- return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
+ return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
}
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
@@ -1153,10 +1201,10 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
+ return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
}
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
@@ -1175,10 +1223,10 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
}
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
@@ -1197,10 +1245,10 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
+ return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
}
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
@@ -1219,10 +1267,12 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
+ /* This function always performs a signed comparison, but __v8qi is a char
+ which may be signed or unsigned, so use __v8qs. */
+ return (__m64)((__v8qs)__m1 > (__v8qs)__m2);
}
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
@@ -1241,10 +1291,10 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
+ return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
}
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
@@ -1263,10 +1313,10 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
{
- return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
+ return (__m64)((__v2si)__m1 > (__v2si)__m2);
}
/// Constructs a 64-bit integer vector initialized to zero.
@@ -1276,7 +1326,7 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
/// This intrinsic corresponds to the PXOR instruction.
///
/// \returns An initialized 64-bit integer vector with all elements set to zero.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setzero_si64(void)
{
return __extension__ (__m64){ 0LL };
@@ -1297,10 +1347,10 @@ _mm_setzero_si64(void)
/// A 32-bit integer value used to initialize the lower 32 bits of the
/// result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set_pi32(int __i1, int __i0)
{
- return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
+ return __extension__ (__m64)(__v2si){__i0, __i1};
}
/// Constructs a 64-bit integer vector initialized with the specified
@@ -1320,10 +1370,10 @@ _mm_set_pi32(int __i1, int __i0)
/// \param __s0
/// A 16-bit integer value used to initialize bits [15:0] of the result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
{
- return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
+ return __extension__ (__m64)(__v4hi){__s0, __s1, __s2, __s3};
}
/// Constructs a 64-bit integer vector initialized with the specified
@@ -1351,12 +1401,12 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
/// \param __b0
/// An 8-bit integer value used to initialize bits [7:0] of the result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
char __b1, char __b0)
{
- return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
- __b4, __b5, __b6, __b7);
+ return __extension__ (__m64)(__v8qi){__b0, __b1, __b2, __b3,
+ __b4, __b5, __b6, __b7};
}
/// Constructs a 64-bit integer vector of [2 x i32], with each of the
@@ -1372,7 +1422,7 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
/// A 32-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [2 x i32].
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi32(int __i)
{
return _mm_set_pi32(__i, __i);
@@ -1391,7 +1441,7 @@ _mm_set1_pi32(int __i)
/// A 16-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [4 x i16].
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi16(short __w)
{
return _mm_set_pi16(__w, __w, __w, __w);
@@ -1409,7 +1459,7 @@ _mm_set1_pi16(short __w)
/// An 8-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [8 x i8].
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi8(char __b)
{
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
@@ -1430,7 +1480,7 @@ _mm_set1_pi8(char __b)
/// A 32-bit integer value used to initialize the upper 32 bits of the
/// result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setr_pi32(int __i0, int __i1)
{
return _mm_set_pi32(__i1, __i0);
@@ -1453,7 +1503,7 @@ _mm_setr_pi32(int __i0, int __i1)
/// \param __w3
/// A 16-bit integer value used to initialize bits [63:48] of the result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
{
return _mm_set_pi16(__w3, __w2, __w1, __w0);
@@ -1484,14 +1534,16 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
/// \param __b7
/// An 8-bit integer value used to initialize bits [63:56] of the result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
char __b6, char __b7)
{
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
}
-#undef __DEFAULT_FN_ATTRS
+#undef __anyext128
+#undef __trunc64
+#undef __DEFAULT_FN_ATTRS_SSE2
/* Aliases for compatibility. */
#define _m_empty _mm_empty
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index bf8327b692d1..bd832ce8dddf 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -19,11 +19,13 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
- __target__("ssse3,no-evex512"), __min_vector_width__(64)))
-#define __DEFAULT_FN_ATTRS_MMX \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("mmx,ssse3,no-evex512"), \
- __min_vector_width__(64)))
+ __target__("ssse3,no-evex512"), __min_vector_width__(128)))
+
+#define __trunc64(x) \
+ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __anyext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, -1, -1)
/// Computes the absolute value of each of the packed 8-bit signed
/// integers in the source operand and stores the 8-bit unsigned integer
@@ -37,10 +39,10 @@
/// A 64-bit vector of [8 x i8].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi8(__m64 __a)
{
- return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
+ return (__m64)__builtin_elementwise_abs((__v8qs)__a);
}
/// Computes the absolute value of each of the packed 8-bit signed
@@ -73,10 +75,10 @@ _mm_abs_epi8(__m128i __a)
/// A 64-bit vector of [4 x i16].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi16(__m64 __a)
{
- return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
+ return (__m64)__builtin_elementwise_abs((__v4hi)__a);
}
/// Computes the absolute value of each of the packed 16-bit signed
@@ -109,10 +111,10 @@ _mm_abs_epi16(__m128i __a)
/// A 64-bit vector of [2 x i32].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi32(__m64 __a)
{
- return (__m64)__builtin_ia32_pabsd((__v2si)__a);
+ return (__m64)__builtin_elementwise_abs((__v2si)__a);
}
/// Computes the absolute value of each of the packed 32-bit signed
@@ -177,7 +179,10 @@ _mm_abs_epi32(__m128i __a)
/// \returns A 64-bit integer vector containing the concatenated right-shifted
/// value.
#define _mm_alignr_pi8(a, b, n) \
- ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
+ ((__m64)__builtin_shufflevector( \
+ __builtin_ia32_psrldqi128_byteshift( \
+ __builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0), \
+ (n)), __extension__ (__v2di){}, 0))
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16].
@@ -242,10 +247,11 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadd_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
+ return __trunc64(__builtin_ia32_phaddw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -265,10 +271,11 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
/// destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadd_pi32(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
+ return __trunc64(__builtin_ia32_phaddd128(
+ (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
}
/// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -317,10 +324,11 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// sums of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadds_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
+ return __trunc64(__builtin_ia32_phaddsw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -386,10 +394,11 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsub_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
+ return __trunc64(__builtin_ia32_phsubw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -409,10 +418,11 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsub_pi32(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
+ return __trunc64(__builtin_ia32_phsubd128(
+ (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
}
/// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -461,10 +471,11 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// differences of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsubs_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
+ return __trunc64(__builtin_ia32_phsubsw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -525,10 +536,11 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_maddubs_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
+ return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
+ (__v16qi)__anyext128(__b)));
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
@@ -565,10 +577,11 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b)
/// A 64-bit vector of [4 x i16] containing one of the source operands.
/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
/// products of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
+ return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a),
+ (__v8hi)__anyext128(__b)));
}
/// Copies the 8-bit integers from a 128-bit integer vector to the
@@ -614,12 +627,15 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
/// 1: Clear the corresponding byte in the destination. \n
/// 0: Copy the selected source byte to the corresponding byte in the
/// destination. \n
-/// Bits [3:0] select the source byte to be copied.
+/// Bits [2:0] select the source byte to be copied.
/// \returns A 64-bit integer vector containing the copied or cleared values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_shuffle_pi8(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
+ return __trunc64(__builtin_ia32_pshufb128(
+ (__v16qi)__builtin_shufflevector(
+ (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1),
+ (__v16qi)__anyext128(__b)));
}
/// For each 8-bit integer in the first source operand, perform one of
@@ -720,10 +736,11 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
/// A 64-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi8(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
+ return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
+ (__v16qi)__anyext128(__b)));
}
/// For each 16-bit integer in the first source operand, perform one of
@@ -746,10 +763,11 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
+ return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
+ (__v8hi)__anyext128(__b)));
}
/// For each 32-bit integer in the first source operand, perform one of
@@ -772,13 +790,15 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing two control doublewords corresponding
/// to positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi32(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
+ return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
+ (__v4si)__anyext128(__b)));
}
+#undef __anyext128
+#undef __trunc64
#undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_MMX
#endif /* __TMMINTRIN_H */
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 1ef89de9c9f5..5b9e90e8f061 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -35,9 +35,21 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \
__min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS_MMX \
+#define __DEFAULT_FN_ATTRS_SSE2 \
__attribute__((__always_inline__, __nodebug__, \
- __target__("mmx,sse,no-evex512"), __min_vector_width__(64)))
+ __target__("sse2,no-evex512"), __min_vector_width__(128)))
+
+#define __trunc64(x) \
+ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __zext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, 2, 3)
+#define __anyext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, -1, -1)
+#define __zeroupper64(x) \
+ (__m128i) __builtin_shufflevector((__v4si)(x), __extension__(__v4si){}, 0, \
+ 1, 4, 5)
/// Adds the 32-bit float values in the low-order bits of the operands.
///
@@ -1448,10 +1460,10 @@ _mm_cvtss_si64(__m128 __a)
/// \param __a
/// A 128-bit vector of [4 x float].
/// \returns A 64-bit integer vector containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtps_pi32(__m128 __a)
{
- return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);
+ return __trunc64(__builtin_ia32_cvtps2dq((__v4sf)__zeroupper64(__a)));
}
/// Converts two low-order float values in a 128-bit vector of
@@ -1468,7 +1480,7 @@ _mm_cvtps_pi32(__m128 __a)
/// \param __a
/// A 128-bit vector of [4 x float].
/// \returns A 64-bit integer vector containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cvt_ps2pi(__m128 __a)
{
return _mm_cvtps_pi32(__a);
@@ -1558,10 +1570,10 @@ _mm_cvttss_si64(__m128 __a)
/// \param __a
/// A 128-bit vector of [4 x float].
/// \returns A 64-bit integer vector containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cvttps_pi32(__m128 __a)
{
- return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);
+ return __trunc64(__builtin_ia32_cvttps2dq((__v4sf)__zeroupper64(__a)));
}
/// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
@@ -1579,7 +1591,7 @@ _mm_cvttps_pi32(__m128 __a)
/// \param __a
/// A 128-bit vector of [4 x float].
/// \returns A 64-bit integer vector containing the converted values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtt_ps2pi(__m128 __a)
{
return _mm_cvttps_pi32(__a);
@@ -1674,10 +1686,13 @@ _mm_cvtsi64_ss(__m128 __a, long long __b)
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// converted value of the second operand. The upper 64 bits are copied from
/// the upper 64 bits of the first operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtpi32_ps(__m128 __a, __m64 __b)
{
- return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);
+ return (__m128)__builtin_shufflevector(
+ (__v4sf)__a,
+ __builtin_convertvector((__v4si)__zext128(__b), __v4sf),
+ 4, 5, 2, 3);
}
/// Converts two elements of a 64-bit vector of [2 x i32] into two
@@ -1697,7 +1712,7 @@ _mm_cvtpi32_ps(__m128 __a, __m64 __b)
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// converted value from the second operand. The upper 64 bits are copied
/// from the upper 64 bits of the first operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
_mm_cvt_pi2ps(__m128 __a, __m64 __b)
{
return _mm_cvtpi32_ps(__a, __b);
@@ -2231,10 +2246,10 @@ _mm_storer_ps(float *__p, __m128 __a)
/// A pointer to an aligned memory location used to store the register value.
/// \param __a
/// A 64-bit integer containing the value to be stored.
-static __inline__ void __DEFAULT_FN_ATTRS_MMX
+static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_pi(void *__p, __m64 __a)
{
- __builtin_ia32_movntq((__m64 *)__p, __a);
+ __builtin_nontemporal_store(__a, (__m64 *)__p);
}
/// Moves packed float values from a 128-bit vector of [4 x float] to a
@@ -2296,7 +2311,7 @@ void _mm_sfence(void);
/// 3: Bits [63:48] are copied to the destination.
/// \returns A 16-bit integer containing the extracted 16 bits of packed data.
#define _mm_extract_pi16(a, n) \
- ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
+ ((int)(unsigned short)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2342,10 +2357,10 @@ void _mm_sfence(void);
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_max_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
+ return (__m64)__builtin_elementwise_max((__v4hi)__a, (__v4hi)__b);
}
/// Compares each of the corresponding packed 8-bit unsigned integer
@@ -2361,10 +2376,10 @@ _mm_max_pi16(__m64 __a, __m64 __b)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_max_pu8(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
+ return (__m64)__builtin_elementwise_max((__v8qu)__a, (__v8qu)__b);
}
/// Compares each of the corresponding packed 16-bit integer values of
@@ -2380,10 +2395,10 @@ _mm_max_pu8(__m64 __a, __m64 __b)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_min_pi16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
+ return (__m64)__builtin_elementwise_min((__v4hi)__a, (__v4hi)__b);
}
/// Compares each of the corresponding packed 8-bit unsigned integer
@@ -2399,10 +2414,10 @@ _mm_min_pi16(__m64 __a, __m64 __b)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_min_pu8(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
+ return (__m64)__builtin_elementwise_min((__v8qu)__a, (__v8qu)__b);
}
/// Takes the most significant bit from each 8-bit element in a 64-bit
@@ -2417,10 +2432,10 @@ _mm_min_pu8(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing the values with bits to be extracted.
/// \returns The most significant bit from each 8-bit element in \a __a,
/// written to bits [7:0].
-static __inline__ int __DEFAULT_FN_ATTRS_MMX
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2
_mm_movemask_pi8(__m64 __a)
{
- return __builtin_ia32_pmovmskb((__v8qi)__a);
+ return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a));
}
/// Multiplies packed 16-bit unsigned integer values and writes the
@@ -2436,10 +2451,11 @@ _mm_movemask_pi8(__m64 __a)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the products of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_mulhi_pu16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
+ return __trunc64(__builtin_ia32_pmulhuw128((__v8hi)__anyext128(__a),
+ (__v8hi)__anyext128(__b)));
}
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
@@ -2476,8 +2492,10 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form
/// [b6, b4, b2, b0].
/// \returns A 64-bit integer vector containing the shuffled values.
-#define _mm_shuffle_pi16(a, n) \
- ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
+#define _mm_shuffle_pi16(a, n) \
+ ((__m64)__builtin_shufflevector((__v4hi)(__m64)(a), __extension__(__v4hi){}, \
+ (n) & 0x3, ((n) >> 2) & 0x3, \
+ ((n) >> 4) & 0x3, ((n) >> 6) & 0x3))
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
@@ -2502,10 +2520,25 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
/// A pointer to a 64-bit memory location that will receive the conditionally
/// copied integer values. The address of the memory location does not have
/// to be aligned.
-static __inline__ void __DEFAULT_FN_ATTRS_MMX
+static __inline__ void __DEFAULT_FN_ATTRS_SSE2
_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
{
- __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
+ // This is complex, because we need to support the case where __p is pointing
+ // within the last 15 to 8 bytes of a page. In that case, using a 128-bit
+ // write might cause a trap where a 64-bit maskmovq would not. (Memory
+ // locations not selected by the mask bits might still cause traps.)
+ __m128i __d128 = __anyext128(__d);
+ __m128i __n128 = __zext128(__n);
+ if (((__SIZE_TYPE__)__p & 0xfff) >= 4096-15 &&
+ ((__SIZE_TYPE__)__p & 0xfff) <= 4096-8) {
+ // If there's a risk of spurious trap due to a 128-bit write, back up the
+ // pointer by 8 bytes and shift values in registers to match.
+ __p -= 8;
+ __d128 = __builtin_ia32_pslldqi128_byteshift((__v2di)__d128, 8);
+ __n128 = __builtin_ia32_pslldqi128_byteshift((__v2di)__n128, 8);
+ }
+
+ __builtin_ia32_maskmovdqu((__v16qi)__d128, (__v16qi)__n128, __p);
}
/// Computes the rounded averages of the packed unsigned 8-bit integer
@@ -2521,10 +2554,11 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_avg_pu8(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
+ return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a),
+ (__v16qi)__anyext128(__b)));
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
@@ -2540,10 +2574,11 @@ _mm_avg_pu8(__m64 __a, __m64 __b)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_avg_pu16(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
+ return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a),
+ (__v8hi)__anyext128(__b)));
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
@@ -2562,10 +2597,11 @@ _mm_avg_pu16(__m64 __a, __m64 __b)
/// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the
/// sets of absolute differences between both operands. The upper bits are
/// cleared.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sad_pu8(__m64 __a, __m64 __b)
{
- return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
+ return __trunc64(__builtin_ia32_psadbw128((__v16qi)__zext128(__a),
+ (__v16qi)__zext128(__b)));
}
#if defined(__cplusplus)
@@ -2846,22 +2882,10 @@ _mm_movelh_ps(__m128 __a, __m128 __b)
/// from the corresponding elements in this operand.
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
/// values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtpi16_ps(__m64 __a)
{
- __m64 __b, __c;
- __m128 __r;
-
- __b = _mm_setzero_si64();
- __b = _mm_cmpgt_pi16(__b, __a);
- __c = _mm_unpackhi_pi16(__a, __b);
- __r = _mm_setzero_ps();
- __r = _mm_cvtpi32_ps(__r, __c);
- __r = _mm_movelh_ps(__r, __r);
- __c = _mm_unpacklo_pi16(__a, __b);
- __r = _mm_cvtpi32_ps(__r, __c);
-
- return __r;
+ return __builtin_convertvector((__v4hi)__a, __v4sf);
}
/// Converts a 64-bit vector of 16-bit unsigned integer values into a
@@ -2876,21 +2900,10 @@ _mm_cvtpi16_ps(__m64 __a)
/// destination are copied from the corresponding elements in this operand.
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
/// values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtpu16_ps(__m64 __a)
{
- __m64 __b, __c;
- __m128 __r;
-
- __b = _mm_setzero_si64();
- __c = _mm_unpackhi_pi16(__a, __b);
- __r = _mm_setzero_ps();
- __r = _mm_cvtpi32_ps(__r, __c);
- __r = _mm_movelh_ps(__r, __r);
- __c = _mm_unpacklo_pi16(__a, __b);
- __r = _mm_cvtpi32_ps(__r, __c);
-
- return __r;
+ return __builtin_convertvector((__v4hu)__a, __v4sf);
}
/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
@@ -2905,16 +2918,12 @@ _mm_cvtpu16_ps(__m64 __a)
/// from the corresponding lower 4 elements in this operand.
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
/// values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtpi8_ps(__m64 __a)
{
- __m64 __b;
-
- __b = _mm_setzero_si64();
- __b = _mm_cmpgt_pi8(__b, __a);
- __b = _mm_unpacklo_pi8(__a, __b);
-
- return _mm_cvtpi16_ps(__b);
+ return __builtin_convertvector(
+ __builtin_shufflevector((__v8qs)__a, __extension__ (__v8qs){},
+ 0, 1, 2, 3), __v4sf);
}
/// Converts the lower four unsigned 8-bit integer values from a 64-bit
@@ -2930,15 +2939,12 @@ _mm_cvtpi8_ps(__m64 __a)
/// operand.
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
/// values from the source operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtpu8_ps(__m64 __a)
{
- __m64 __b;
-
- __b = _mm_setzero_si64();
- __b = _mm_unpacklo_pi8(__a, __b);
-
- return _mm_cvtpi16_ps(__b);
+ return __builtin_convertvector(
+ __builtin_shufflevector((__v8qu)__a, __extension__ (__v8qu){},
+ 0, 1, 2, 3), __v4sf);
}
/// Converts the two 32-bit signed integer values from each 64-bit vector
@@ -2957,16 +2963,12 @@ _mm_cvtpu8_ps(__m64 __a)
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// copied and converted values from the first operand. The upper 64 bits
/// contain the copied and converted values from the second operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
{
- __m128 __c;
-
- __c = _mm_setzero_ps();
- __c = _mm_cvtpi32_ps(__c, __b);
- __c = _mm_movelh_ps(__c, __c);
-
- return _mm_cvtpi32_ps(__c, __a);
+ return __builtin_convertvector(
+ __builtin_shufflevector((__v2si)__a, (__v2si)__b,
+ 0, 1, 2, 3), __v4sf);
}
/// Converts each single-precision floating-point element of a 128-bit
@@ -2986,16 +2988,11 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
/// A 128-bit floating-point vector of [4 x float].
/// \returns A 64-bit integer vector of [4 x i16] containing the converted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtps_pi16(__m128 __a)
{
- __m64 __b, __c;
-
- __b = _mm_cvtps_pi32(__a);
- __a = _mm_movehl_ps(__a, __a);
- __c = _mm_cvtps_pi32(__a);
-
- return _mm_packs_pi32(__b, __c);
+ return __trunc64(__builtin_ia32_packssdw128(
+ (__v4si)__builtin_ia32_cvtps2dq((__v4sf)__a), (__v4si)_mm_setzero_ps()));
}
/// Converts each single-precision floating-point element of a 128-bit
@@ -3016,7 +3013,7 @@ _mm_cvtps_pi16(__m128 __a)
/// 128-bit floating-point vector of [4 x float].
/// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the
/// converted values and the uppper 32 bits are set to zero.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_cvtps_pi8(__m128 __a)
{
__m64 __b, __c;
@@ -3196,8 +3193,12 @@ do { \
#define _m_psadbw _mm_sad_pu8
#define _m_ _mm_
+#undef __trunc64
+#undef __zext128
+#undef __anyext128
+#undef __zeroupper64
#undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_MMX
+#undef __DEFAULT_FN_ATTRS_SSE2
/* Ugly hack for backwards-compatibility (compatible with gcc) */
#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index be26454ce909..8f9057bbaf25 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -502,7 +502,6 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
switch (BuiltinID) {
default:
return false;
- case X86::BI__builtin_ia32_vec_ext_v2si:
case X86::BI__builtin_ia32_vec_ext_v2di:
case X86::BI__builtin_ia32_vextractf128_pd256:
case X86::BI__builtin_ia32_vextractf128_ps256:
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 5b5bc301bddc..495ae7e18115 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -1,193 +1,200 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx
#include
__m64 test_mm_abs_pi8(__m64 a) {
// CHECK-LABEL: test_mm_abs_pi8
- // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.b
+ // CHECK: call <8 x i8> @llvm.abs.v8i8(
return _mm_abs_pi8(a);
}
__m64 test_mm_abs_pi16(__m64 a) {
// CHECK-LABEL: test_mm_abs_pi16
- // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.w
+ // CHECK: call <4 x i16> @llvm.abs.v4i16(
return _mm_abs_pi16(a);
}
__m64 test_mm_abs_pi32(__m64 a) {
// CHECK-LABEL: test_mm_abs_pi32
- // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.d
+ // CHECK: call <2 x i32> @llvm.abs.v2i32(
return _mm_abs_pi32(a);
}
__m64 test_mm_add_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.padd.b
+ // CHECK: add <8 x i8> {{%.*}}, {{%.*}}
return _mm_add_pi8(a, b);
}
__m64 test_mm_add_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.padd.w
+ // CHECK: add <4 x i16> {{%.*}}, {{%.*}}
return _mm_add_pi16(a, b);
}
__m64 test_mm_add_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.padd.d
+ // CHECK: add <2 x i32> {{%.*}}, {{%.*}}
return _mm_add_pi32(a, b);
}
__m64 test_mm_add_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+ // CHECK: add i64 {{%.*}}, {{%.*}}
return _mm_add_si64(a, b);
}
__m64 test_mm_adds_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_adds_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.padds.b
+ // CHECK: call <8 x i8> @llvm.sadd.sat.v8i8(
return _mm_adds_pi8(a, b);
}
__m64 test_mm_adds_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_adds_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.padds.w
+ // CHECK: call <4 x i16> @llvm.sadd.sat.v4i16(
return _mm_adds_pi16(a, b);
}
__m64 test_mm_adds_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_adds_pu8
- // CHECK: call x86_mmx @llvm.x86.mmx.paddus.b
+ // CHECK: call <8 x i8> @llvm.uadd.sat.v8i8(
return _mm_adds_pu8(a, b);
}
__m64 test_mm_adds_pu16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_adds_pu16
- // CHECK: call x86_mmx @llvm.x86.mmx.paddus.w
+ // CHECK: call <4 x i16> @llvm.uadd.sat.v4i16(
return _mm_adds_pu16(a, b);
}
__m64 test_mm_alignr_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_alignr_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.palignr.b
+ // CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> zeroinitializer, <16 x i32>
return _mm_alignr_pi8(a, b, 2);
}
__m64 test_mm_and_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_and_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.pand
+ // CHECK: and <1 x i64> {{%.*}}, {{%.*}}
return _mm_and_si64(a, b);
}
__m64 test_mm_andnot_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_andnot_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.pandn
+ // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}},
+ // CHECK: and <1 x i64> [[TMP]], {{%.*}}
return _mm_andnot_si64(a, b);
}
__m64 test_mm_avg_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_avg_pu8
- // CHECK: call x86_mmx @llvm.x86.mmx.pavg.b
+ // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(
return _mm_avg_pu8(a, b);
}
__m64 test_mm_avg_pu16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_avg_pu16
- // CHECK: call x86_mmx @llvm.x86.mmx.pavg.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(
return _mm_avg_pu16(a, b);
}
__m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpeq_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.b
+ // CHECK: [[CMP:%.*]] = icmp eq <8 x i8> {{%.*}}, {{%.*}}
+ // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
return _mm_cmpeq_pi8(a, b);
}
__m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpeq_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.w
+ // CHECK: [[CMP:%.*]] = icmp eq <4 x i16> {{%.*}}, {{%.*}}
+ // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
return _mm_cmpeq_pi16(a, b);
}
__m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpeq_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.d
+ // CHECK: [[CMP:%.*]] = icmp eq <2 x i32> {{%.*}}, {{%.*}}
+ // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
return _mm_cmpeq_pi32(a, b);
}
__m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.b
+ // CHECK: [[CMP:%.*]] = icmp sgt <8 x i8> {{%.*}}, {{%.*}}
+ // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
return _mm_cmpgt_pi8(a, b);
}
__m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.w
+ // CHECK: [[CMP:%.*]] = icmp sgt <4 x i16> {{%.*}}, {{%.*}}
+ // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
return _mm_cmpgt_pi16(a, b);
}
__m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.d
+ // CHECK: [[CMP:%.*]] = icmp sgt <2 x i32> {{%.*}}, {{%.*}}
+ // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
return _mm_cmpgt_pi32(a, b);
}
__m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) {
// CHECK-LABEL: test_mm_cvt_pi2ps
- // CHECK: <4 x float> @llvm.x86.sse.cvtpi2ps
+ // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
return _mm_cvt_pi2ps(a, b);
}
__m64 test_mm_cvt_ps2pi(__m128 a) {
// CHECK-LABEL: test_mm_cvt_ps2pi
- // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+ // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(
return _mm_cvt_ps2pi(a);
}
__m64 test_mm_cvtpd_pi32(__m128d a) {
// CHECK-LABEL: test_mm_cvtpd_pi32
- // CHECK: call x86_mmx @llvm.x86.sse.cvtpd2pi
+ // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(
return _mm_cvtpd_pi32(a);
}
__m128 test_mm_cvtpi16_ps(__m64 a) {
// CHECK-LABEL: test_mm_cvtpi16_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+ // CHECK: sitofp <4 x i16> {{%.*}} to <4 x float>
return _mm_cvtpi16_ps(a);
}
__m128d test_mm_cvtpi32_pd(__m64 a) {
// CHECK-LABEL: test_mm_cvtpi32_pd
- // CHECK: call <2 x double> @llvm.x86.sse.cvtpi2pd
+ // CHECK: sitofp <2 x i32> {{%.*}} to <2 x double>
return _mm_cvtpi32_pd(a);
}
__m128 test_mm_cvtpi32_ps(__m128 a, __m64 b) {
// CHECK-LABEL: test_mm_cvtpi32_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+ // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
return _mm_cvtpi32_ps(a, b);
}
__m128 test_mm_cvtpi32x2_ps(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cvtpi32x2_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
- // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+ // CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
return _mm_cvtpi32x2_ps(a, b);
}
__m64 test_mm_cvtps_pi16(__m128 a) {
// CHECK-LABEL: test_mm_cvtps_pi16
- // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+ // CHECK: [[TMP0:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> {{%.*}})
+ // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP0]],
return _mm_cvtps_pi16(a);
}
__m64 test_mm_cvtps_pi32(__m128 a) {
// CHECK-LABEL: test_mm_cvtps_pi32
- // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+ // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(
return _mm_cvtps_pi32(a);
}
@@ -205,19 +212,19 @@ int test_mm_cvtsi64_si32(__m64 a) {
__m64 test_mm_cvttpd_pi32(__m128d a) {
// CHECK-LABEL: test_mm_cvttpd_pi32
- // CHECK: call x86_mmx @llvm.x86.sse.cvttpd2pi
+ // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(
return _mm_cvttpd_pi32(a);
}
__m64 test_mm_cvttps_pi32(__m128 a) {
// CHECK-LABEL: test_mm_cvttps_pi32
- // CHECK: call x86_mmx @llvm.x86.sse.cvttps2pi
+ // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(
return _mm_cvttps_pi32(a);
}
int test_mm_extract_pi16(__m64 a) {
// CHECK-LABEL: test_mm_extract_pi16
- // CHECK: call i32 @llvm.x86.mmx.pextr.w
+ // CHECK: extractelement <4 x i16> {{%.*}}, i64 2
return _mm_extract_pi16(a, 2);
}
@@ -234,151 +241,153 @@ __m64 test_m_from_int64(long long a) {
__m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadd_pi16
- // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.w
+ // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(
return _mm_hadd_pi16(a, b);
}
__m64 test_mm_hadd_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadd_pi32
- // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.d
+ // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(
return _mm_hadd_pi32(a, b);
}
__m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadds_pi16
- // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.sw
+ // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(
return _mm_hadds_pi16(a, b);
}
__m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi16
- // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.w
+ // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(
return _mm_hsub_pi16(a, b);
}
__m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi32
- // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.d
+ // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(
return _mm_hsub_pi32(a, b);
}
__m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsubs_pi16
- // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.sw
+ // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(
return _mm_hsubs_pi16(a, b);
}
__m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK-LABEL: test_mm_insert_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pinsr.w
+ // CHECK: insertelement <4 x i16>
return _mm_insert_pi16(a, d, 2);
}
__m64 test_mm_madd_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_madd_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pmadd.wd
+ // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(
return _mm_madd_pi16(a, b);
}
__m64 test_mm_maddubs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_maddubs_pi16
- // CHECK: call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw
+ // CHECK: call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(
return _mm_maddubs_pi16(a, b);
}
void test_mm_maskmove_si64(__m64 d, __m64 n, char *p) {
// CHECK-LABEL: test_mm_maskmove_si64
- // CHECK: call void @llvm.x86.mmx.maskmovq
+ // CHECK: call void @llvm.x86.sse2.maskmov.dqu(
_mm_maskmove_si64(d, n, p);
}
__m64 test_mm_max_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_max_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pmaxs.w
+ // CHECK: call <4 x i16> @llvm.smax.v4i16(
return _mm_max_pi16(a, b);
}
__m64 test_mm_max_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_max_pu8
- // CHECK: call x86_mmx @llvm.x86.mmx.pmaxu.b
+ // CHECK: call <8 x i8> @llvm.umax.v8i8(
return _mm_max_pu8(a, b);
}
__m64 test_mm_min_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_min_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pmins.w
+ // CHECK: call <4 x i16> @llvm.smin.v4i16(
return _mm_min_pi16(a, b);
}
__m64 test_mm_min_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_min_pu8
- // CHECK: call x86_mmx @llvm.x86.mmx.pminu.b
+ // CHECK: call <8 x i8> @llvm.umin.v8i8(
return _mm_min_pu8(a, b);
}
int test_mm_movemask_pi8(__m64 a) {
// CHECK-LABEL: test_mm_movemask_pi8
- // CHECK: call i32 @llvm.x86.mmx.pmovmskb
+ // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(
return _mm_movemask_pi8(a);
}
__m64 test_mm_mul_su32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mul_su32
- // CHECK: call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+ // CHECK: and <2 x i64> {{%.*}},
+ // CHECK: and <2 x i64> {{%.*}},
+ // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_su32(a, b);
}
__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mulhi_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pmulh.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(
return _mm_mulhi_pi16(a, b);
}
__m64 test_mm_mulhi_pu16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mulhi_pu16
- // CHECK: call x86_mmx @llvm.x86.mmx.pmulhu.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(
return _mm_mulhi_pu16(a, b);
}
__m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mulhrs_pi16
- // CHECK: call x86_mmx @llvm.x86.ssse3.pmul.hr.sw
+ // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(
return _mm_mulhrs_pi16(a, b);
}
__m64 test_mm_mullo_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mullo_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pmull.w
+ // CHECK: mul <4 x i16> {{%.*}}, {{%.*}}
return _mm_mullo_pi16(a, b);
}
__m64 test_mm_or_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_or_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.por
+ // CHECK: or <1 x i64> {{%.*}}, {{%.*}}
return _mm_or_si64(a, b);
}
__m64 test_mm_packs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_packs_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.packsswb
+ // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(
return _mm_packs_pi16(a, b);
}
__m64 test_mm_packs_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_packs_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.packssdw
+ // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(
return _mm_packs_pi32(a, b);
}
__m64 test_mm_packs_pu16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_packs_pu16
- // CHECK: call x86_mmx @llvm.x86.mmx.packuswb
+ // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(
return _mm_packs_pu16(a, b);
}
__m64 test_mm_sad_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sad_pu8
- // CHECK: call x86_mmx @llvm.x86.mmx.psad.bw
+ // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>
return _mm_sad_pu8(a, b);
}
@@ -471,187 +480,187 @@ __m64 test_mm_set1_pi32(int a) {
__m64 test_mm_shuffle_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_shuffle_pi8
- // CHECK: call x86_mmx @llvm.x86.ssse3.pshuf.b
+ // CHECK: call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(
return _mm_shuffle_pi8(a, b);
}
__m64 test_mm_shuffle_pi16(__m64 a) {
// CHECK-LABEL: test_mm_shuffle_pi16
- // CHECK: call x86_mmx @llvm.x86.sse.pshuf.w
+ // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32>
return _mm_shuffle_pi16(a, 3);
}
__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi8
- // CHECK: call x86_mmx @llvm.x86.ssse3.psign.b
+ // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(
return _mm_sign_pi8(a, b);
}
__m64 test_mm_sign_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi16
- // CHECK: call x86_mmx @llvm.x86.ssse3.psign.w
+ // CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(
return _mm_sign_pi16(a, b);
}
__m64 test_mm_sign_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi32
- // CHECK: call x86_mmx @llvm.x86.ssse3.psign.d
+ // CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(
return _mm_sign_pi32(a, b);
}
__m64 test_mm_sll_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.psll.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(
return _mm_sll_pi16(a, b);
}
__m64 test_mm_sll_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.psll.d
+ // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(
return _mm_sll_pi32(a, b);
}
__m64 test_mm_sll_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.psll.q
+ // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(
return _mm_sll_si64(a, b);
}
__m64 test_mm_slli_pi16(__m64 a) {
// CHECK-LABEL: test_mm_slli_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.pslli.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(
return _mm_slli_pi16(a, 3);
}
__m64 test_mm_slli_pi32(__m64 a) {
// CHECK-LABEL: test_mm_slli_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.pslli.d
+ // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(
return _mm_slli_pi32(a, 3);
}
__m64 test_mm_slli_si64(__m64 a) {
// CHECK-LABEL: test_mm_slli_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.pslli.q
+ // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(
return _mm_slli_si64(a, 3);
}
__m64 test_mm_sra_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sra_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.psra.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(
return _mm_sra_pi16(a, b);
}
__m64 test_mm_sra_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sra_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.psra.d
+ // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(
return _mm_sra_pi32(a, b);
}
__m64 test_mm_srai_pi16(__m64 a) {
// CHECK-LABEL: test_mm_srai_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.psrai.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(
return _mm_srai_pi16(a, 3);
}
__m64 test_mm_srai_pi32(__m64 a) {
// CHECK-LABEL: test_mm_srai_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.psrai.d
+ // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(
return _mm_srai_pi32(a, 3);
}
__m64 test_mm_srl_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_srl_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.psrl.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(
return _mm_srl_pi16(a, b);
}
__m64 test_mm_srl_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_srl_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.psrl.d
+ // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(
return _mm_srl_pi32(a, b);
}
__m64 test_mm_srl_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_srl_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.psrl.q
+ // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(
return _mm_srl_si64(a, b);
}
__m64 test_mm_srli_pi16(__m64 a) {
// CHECK-LABEL: test_mm_srli_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.psrli.w
+ // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(
return _mm_srli_pi16(a, 3);
}
__m64 test_mm_srli_pi32(__m64 a) {
// CHECK-LABEL: test_mm_srli_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.psrli.d
+ // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(
return _mm_srli_pi32(a, 3);
}
__m64 test_mm_srli_si64(__m64 a) {
// CHECK-LABEL: test_mm_srli_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.psrli.q
+ // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(
return _mm_srli_si64(a, 3);
}
void test_mm_stream_pi(__m64 *p, __m64 a) {
// CHECK-LABEL: test_mm_stream_pi
- // CHECK: call void @llvm.x86.mmx.movnt.dq
+ // CHECK: store <1 x i64> {{%.*}}, ptr {{%.*}}, align 8, !nontemporal
_mm_stream_pi(p, a);
}
void test_mm_stream_pi_void(void *p, __m64 a) {
// CHECK-LABEL: test_mm_stream_pi_void
- // CHECK: call void @llvm.x86.mmx.movnt.dq
+ // CHECK: store <1 x i64> {{%.*}}, ptr {{%.*}}, align 8, !nontemporal
_mm_stream_pi(p, a);
}
__m64 test_mm_sub_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.psub.b
+ // CHECK: sub <8 x i8> {{%.*}}, {{%.*}}
return _mm_sub_pi8(a, b);
}
__m64 test_mm_sub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.psub.w
+ // CHECK: sub <4 x i16> {{%.*}}, {{%.*}}
return _mm_sub_pi16(a, b);
}
__m64 test_mm_sub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.psub.d
+ // CHECK: sub <2 x i32> {{%.*}}, {{%.*}}
return _mm_sub_pi32(a, b);
}
__m64 test_mm_sub_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+ // CHECK: sub i64 {{%.*}}, {{%.*}}
return _mm_sub_si64(a, b);
}
__m64 test_mm_subs_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_subs_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.psubs.b
+ // CHECK: call <8 x i8> @llvm.ssub.sat.v8i8(
return _mm_subs_pi8(a, b);
}
__m64 test_mm_subs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_subs_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.psubs.w
+ // CHECK: call <4 x i16> @llvm.ssub.sat.v4i16(
return _mm_subs_pi16(a, b);
}
__m64 test_mm_subs_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_subs_pu8
- // CHECK: call x86_mmx @llvm.x86.mmx.psubus.b
+ // CHECK: call <8 x i8> @llvm.usub.sat.v8i8(
return _mm_subs_pu8(a, b);
}
__m64 test_mm_subs_pu16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_subs_pu16
- // CHECK: call x86_mmx @llvm.x86.mmx.psubus.w
+ // CHECK: call <4 x i16> @llvm.usub.sat.v4i16(
return _mm_subs_pu16(a, b);
}
@@ -668,42 +677,42 @@ long long test_m_to_int64(__m64 a) {
__m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_unpackhi_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.punpckhbw
+ // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32>
return _mm_unpackhi_pi8(a, b);
}
__m64 test_mm_unpackhi_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_unpackhi_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.punpckhwd
+ // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32>
return _mm_unpackhi_pi16(a, b);
}
__m64 test_mm_unpackhi_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_unpackhi_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.punpckhdq
+ // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32>
return _mm_unpackhi_pi32(a, b);
}
__m64 test_mm_unpacklo_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_unpacklo_pi8
- // CHECK: call x86_mmx @llvm.x86.mmx.punpcklbw
+ // CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32>
return _mm_unpacklo_pi8(a, b);
}
__m64 test_mm_unpacklo_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_unpacklo_pi16
- // CHECK: call x86_mmx @llvm.x86.mmx.punpcklwd
+ // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32>
return _mm_unpacklo_pi16(a, b);
}
__m64 test_mm_unpacklo_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_unpacklo_pi32
- // CHECK: call x86_mmx @llvm.x86.mmx.punpckldq
+ // CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32>
return _mm_unpacklo_pi32(a, b);
}
__m64 test_mm_xor_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_xor_si64
- // CHECK: call x86_mmx @llvm.x86.mmx.pxor
+ // CHECK: xor <1 x i64> {{%.*}}, {{%.*}}
return _mm_xor_si64(a, b);
}
diff --git a/clang/test/CodeGen/X86/mmx-inline-asm.c b/clang/test/CodeGen/X86/mmx-inline-asm.c
index a0702c7f780d..17fce1a48755 100644
--- a/clang/test/CodeGen/X86/mmx-inline-asm.c
+++ b/clang/test/CodeGen/X86/mmx-inline-asm.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -triple i386 -target-feature +mmx %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple i386 -target-feature +mmx -target-feature +sse2 %s -o - | FileCheck %s
#include
// CHECK: { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
diff --git a/clang/test/CodeGen/X86/mmx-shift-with-immediate.c b/clang/test/CodeGen/X86/mmx-shift-with-immediate.c
index 83be6b5517c0..741cb9c9c5ec 100644
--- a/clang/test/CodeGen/X86/mmx-shift-with-immediate.c
+++ b/clang/test/CodeGen/X86/mmx-shift-with-immediate.c
@@ -1,23 +1,23 @@
-// RUN: %clang_cc1 -emit-llvm -triple i386 -target-feature +mmx %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple i386 -target-feature +sse2 %s -o - | FileCheck %s
#include
void shift(__m64 a, __m64 b, int c) {
- // CHECK: x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %{{.*}}, i32 {{.*}})
+ // CHECK: <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 {{.*}})
_mm_slli_pi16(a, c);
- // CHECK: x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %{{.*}}, i32 {{.*}})
+ // CHECK: <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 {{.*}})
_mm_slli_pi32(a, c);
- // CHECK: x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %{{.*}}, i32 {{.*}})
+ // CHECK: <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 {{.*}})
_mm_slli_si64(a, c);
- // CHECK: x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %{{.*}}, i32 {{.*}})
+ // CHECK: <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 {{.*}})
_mm_srli_pi16(a, c);
- // CHECK: x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %{{.*}}, i32 {{.*}})
+ // CHECK: <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 {{.*}})
_mm_srli_pi32(a, c);
- // CHECK: x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %{{.*}}, i32 {{.*}})
+ // CHECK: <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 {{.*}})
_mm_srli_si64(a, c);
- // CHECK: x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %{{.*}}, i32 {{.*}})
+ // CHECK: <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 {{.*}})
_mm_srai_pi16(a, c);
- // CHECK: x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %{{.*}}, i32 {{.*}})
+ // CHECK: <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 {{.*}})
_mm_srai_pi32(a, c);
}
diff --git a/clang/test/CodeGen/attr-target-x86-mmx.c b/clang/test/CodeGen/attr-target-x86-mmx.c
index 01663766d984..39b26619475a 100644
--- a/clang/test/CodeGen/attr-target-x86-mmx.c
+++ b/clang/test/CodeGen/attr-target-x86-mmx.c
@@ -1,12 +1,11 @@
// RUN: %clang_cc1 -triple i386-linux-gnu -emit-llvm %s -o - | FileCheck %s
-// Picking a cpu that doesn't have mmx or sse by default so we can enable it later.
+// Picking a cpu that doesn't have sse by default so we can enable it later.
#define __MM_MALLOC_H
#include
-// Verify that when we turn on sse that we also turn on mmx.
-void __attribute__((target("sse"))) shift(__m64 a, __m64 b, int c) {
+void __attribute__((target("sse2"))) shift(__m64 a, __m64 b, int c) {
_mm_slli_pi16(a, c);
_mm_slli_pi32(a, c);
_mm_slli_si64(a, c);
@@ -19,4 +18,4 @@ void __attribute__((target("sse"))) shift(__m64 a, __m64 b, int c) {
_mm_srai_pi32(a, c);
}
-// CHECK: "target-features"="+cx8,+mmx,+sse,+x87"
+// CHECK: "target-features"="+cx8,+mmx,+sse,+sse2,+x87"
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index de31a4db5b0c..c42c3216ec53 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -168,26 +168,6 @@ void f0(void) {
tmp_V4f = __builtin_ia32_minss(tmp_V4f, tmp_V4f);
tmp_V4f = __builtin_ia32_maxss(tmp_V4f, tmp_V4f);
- tmp_V8c = __builtin_ia32_paddsb(tmp_V8c, tmp_V8c);
- tmp_V4s = __builtin_ia32_paddsw(tmp_V4s, tmp_V4s);
- tmp_V8c = __builtin_ia32_psubsb(tmp_V8c, tmp_V8c);
- tmp_V4s = __builtin_ia32_psubsw(tmp_V4s, tmp_V4s);
- tmp_V8c = __builtin_ia32_paddusb(tmp_V8c, tmp_V8c);
- tmp_V4s = __builtin_ia32_paddusw(tmp_V4s, tmp_V4s);
- tmp_V8c = __builtin_ia32_psubusb(tmp_V8c, tmp_V8c);
- tmp_V4s = __builtin_ia32_psubusw(tmp_V4s, tmp_V4s);
- tmp_V4s = __builtin_ia32_pmulhw(tmp_V4s, tmp_V4s);
- tmp_V4s = __builtin_ia32_pmulhuw(tmp_V4s, tmp_V4s);
- tmp_V8c = __builtin_ia32_pcmpeqb(tmp_V8c, tmp_V8c);
- tmp_V4s = __builtin_ia32_pcmpeqw(tmp_V4s, tmp_V4s);
- tmp_V2i = __builtin_ia32_pcmpeqd(tmp_V2i, tmp_V2i);
- tmp_V8c = __builtin_ia32_pcmpgtb(tmp_V8c, tmp_V8c);
- tmp_V4s = __builtin_ia32_pcmpgtw(tmp_V4s, tmp_V4s);
- tmp_V2i = __builtin_ia32_pcmpgtd(tmp_V2i, tmp_V2i);
- tmp_V8c = __builtin_ia32_pmaxub(tmp_V8c, tmp_V8c);
- tmp_V4s = __builtin_ia32_pmaxsw(tmp_V4s, tmp_V4s);
- tmp_V8c = __builtin_ia32_pminub(tmp_V8c, tmp_V8c);
- tmp_V4s = __builtin_ia32_pminsw(tmp_V4s, tmp_V4s);
tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 0);
tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 1);
tmp_V2d = __builtin_ia32_cmppd(tmp_V2d, tmp_V2d, 2);
@@ -220,45 +200,17 @@ void f0(void) {
tmp_V4f = __builtin_ia32_hsubps(tmp_V4f, tmp_V4f);
tmp_V2d = __builtin_ia32_hsubpd(tmp_V2d, tmp_V2d);
tmp_V8s = __builtin_ia32_phaddw128(tmp_V8s, tmp_V8s);
- tmp_V4s = __builtin_ia32_phaddw(tmp_V4s, tmp_V4s);
tmp_V4i = __builtin_ia32_phaddd128(tmp_V4i, tmp_V4i);
- tmp_V2i = __builtin_ia32_phaddd(tmp_V2i, tmp_V2i);
tmp_V8s = __builtin_ia32_phaddsw128(tmp_V8s, tmp_V8s);
- tmp_V4s = __builtin_ia32_phaddsw(tmp_V4s, tmp_V4s);
tmp_V8s = __builtin_ia32_phsubw128(tmp_V8s, tmp_V8s);
- tmp_V4s = __builtin_ia32_phsubw(tmp_V4s, tmp_V4s);
tmp_V4i = __builtin_ia32_phsubd128(tmp_V4i, tmp_V4i);
- tmp_V2i = __builtin_ia32_phsubd(tmp_V2i, tmp_V2i);
tmp_V8s = __builtin_ia32_phsubsw128(tmp_V8s, tmp_V8s);
- tmp_V4s = __builtin_ia32_phsubsw(tmp_V4s, tmp_V4s);
tmp_V8s = __builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c);
- tmp_V8c = __builtin_ia32_pmaddubsw(tmp_V8c, tmp_V8c);
tmp_V8s = __builtin_ia32_pmulhrsw128(tmp_V8s, tmp_V8s);
- tmp_V4s = __builtin_ia32_pmulhrsw(tmp_V4s, tmp_V4s);
tmp_V16c = __builtin_ia32_pshufb128(tmp_V16c, tmp_V16c);
- tmp_V8c = __builtin_ia32_pshufb(tmp_V8c, tmp_V8c);
tmp_V16c = __builtin_ia32_psignb128(tmp_V16c, tmp_V16c);
- tmp_V8c = __builtin_ia32_psignb(tmp_V8c, tmp_V8c);
tmp_V8s = __builtin_ia32_psignw128(tmp_V8s, tmp_V8s);
- tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s);
tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i);
- tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i);
- tmp_V8c = __builtin_ia32_pabsb(tmp_V8c);
- tmp_V4s = __builtin_ia32_pabsw(tmp_V4s);
- tmp_V2i = __builtin_ia32_pabsd(tmp_V2i);
- tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi);
- tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi);
- tmp_V1LLi = __builtin_ia32_psllq(tmp_V1LLi, tmp_V1LLi);
- tmp_V4s = __builtin_ia32_psrlw(tmp_V4s, tmp_V1LLi);
- tmp_V2i = __builtin_ia32_psrld(tmp_V2i, tmp_V1LLi);
- tmp_V1LLi = __builtin_ia32_psrlq(tmp_V1LLi, tmp_V1LLi);
- tmp_V4s = __builtin_ia32_psraw(tmp_V4s, tmp_V1LLi);
- tmp_V2i = __builtin_ia32_psrad(tmp_V2i, tmp_V1LLi);
- tmp_V2i = __builtin_ia32_pmaddwd(tmp_V4s, tmp_V4s);
- tmp_V8c = __builtin_ia32_packsswb(tmp_V4s, tmp_V4s);
- tmp_V4s = __builtin_ia32_packssdw(tmp_V2i, tmp_V2i);
- tmp_V8c = __builtin_ia32_packuswb(tmp_V4s, tmp_V4s);
- tmp_i = __builtin_ia32_vec_ext_v2si(tmp_V2i, 0);
__builtin_ia32_incsspd(tmp_Ui);
__builtin_ia32_incsspq(tmp_ULLi);
@@ -306,8 +258,6 @@ void f0(void) {
(void) __builtin_ia32_clzero(tmp_vp);
(void) __builtin_ia32_cldemote(tmp_vp);
- tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
- tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
tmp_i = __builtin_ia32_cvtss2si(tmp_V4f);
tmp_i = __builtin_ia32_cvttss2si(tmp_V4f);
@@ -320,17 +270,12 @@ void f0(void) {
tmp_LLi = __builtin_ia32_cvtss2si64(tmp_V4f);
tmp_LLi = __builtin_ia32_cvttss2si64(tmp_V4f);
#endif
- tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
- (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
tmp_i = __builtin_ia32_movmskps(tmp_V4f);
- tmp_i = __builtin_ia32_pmovmskb(tmp_V8c);
- (void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi);
(void) __builtin_ia32_sfence();
#ifndef OPENCL
(void) _mm_sfence();
#endif
- tmp_V4s = __builtin_ia32_psadbw(tmp_V8c, tmp_V8c);
tmp_V4f = __builtin_ia32_rcpps(tmp_V4f);
tmp_V4f = __builtin_ia32_rcpss(tmp_V4f);
tmp_V4f = __builtin_ia32_rsqrtps(tmp_V4f);
@@ -348,11 +293,8 @@ void f0(void) {
tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d);
tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d);
tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d);
- tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d);
tmp_V4f = __builtin_ia32_cvtpd2ps(tmp_V2d);
tmp_V4i = __builtin_ia32_cvttpd2dq(tmp_V2d);
- tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d);
- tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i);
tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d);
tmp_i = __builtin_ia32_cvttsd2si(tmp_V2d);
tmp_V4f = __builtin_ia32_cvtsd2ss(tmp_V4f, tmp_V2d);
@@ -379,26 +321,9 @@ void f0(void) {
(void) _mm_pause();
#endif
- tmp_V4s = __builtin_ia32_psllwi(tmp_V4s, imm_i_0_8);
- tmp_V2i = __builtin_ia32_pslldi(tmp_V2i, imm_i_0_8);
- tmp_V1LLi = __builtin_ia32_psllqi(tmp_V1LLi, imm_i_0_8);
- tmp_V4s = __builtin_ia32_psrawi(tmp_V4s, imm_i_0_8);
- tmp_V2i = __builtin_ia32_psradi(tmp_V2i, imm_i_0_8);
- tmp_V4s = __builtin_ia32_psrlwi(tmp_V4s, imm_i_0_8);
- tmp_V2i = __builtin_ia32_psrldi(tmp_V2i, imm_i_0_8);
- tmp_V1LLi = __builtin_ia32_psrlqi(tmp_V1LLi, imm_i_0_8);
// Using non-immediate argument supported for gcc compatibility
- tmp_V4s = __builtin_ia32_psllwi(tmp_V4s, tmp_i);
- tmp_V2i = __builtin_ia32_pslldi(tmp_V2i, tmp_i);
- tmp_V1LLi = __builtin_ia32_psllqi(tmp_V1LLi, tmp_i);
- tmp_V4s = __builtin_ia32_psrawi(tmp_V4s, tmp_i);
- tmp_V2i = __builtin_ia32_psradi(tmp_V2i, tmp_i);
- tmp_V4s = __builtin_ia32_psrlwi(tmp_V4s, tmp_i);
- tmp_V2i = __builtin_ia32_psrldi(tmp_V2i, tmp_i);
- tmp_V1LLi = __builtin_ia32_psrlqi(tmp_V1LLi, tmp_i);
- tmp_V1LLi = __builtin_ia32_pmuludq(tmp_V2i, tmp_V2i);
tmp_V2LLi = __builtin_ia32_pmuludq128(tmp_V4i, tmp_V4i);
tmp_V8s = __builtin_ia32_psraw128(tmp_V8s, tmp_V8s);
tmp_V4i = __builtin_ia32_psrad128(tmp_V4i, tmp_V4i);
@@ -433,7 +358,6 @@ void f0(void) {
(void) __builtin_ia32_mwait(tmp_Ui, tmp_Ui);
tmp_V16c = __builtin_ia32_lddqu(tmp_cCp);
tmp_V16c = __builtin_ia32_palignr128(tmp_V16c, tmp_V16c, imm_i);
- tmp_V8c = __builtin_ia32_palignr(tmp_V8c, tmp_V8c, imm_i);
#ifdef USE_SSE4
tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);
diff --git a/clang/test/CodeGen/palignr.c b/clang/test/CodeGen/palignr.c
index 5a77597c3403..092937ac115d 100644
--- a/clang/test/CodeGen/palignr.c
+++ b/clang/test/CodeGen/palignr.c
@@ -14,18 +14,3 @@ int4 align2(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 16); }
int4 align3(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 17); }
// CHECK: xor
int4 align4(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 32); }
-
-#define _mm_alignr_pi8(a, b, n) (__builtin_ia32_palignr((a), (b), (n)))
-typedef __attribute__((vector_size(8))) int int2;
-
-// CHECK: palignr
-int2 align5(int2 a, int2 b) { return _mm_alignr_pi8(a, b, 8); }
-
-// CHECK: palignr
-int2 align6(int2 a, int2 b) { return _mm_alignr_pi8(a, b, 9); }
-
-// CHECK: palignr
-int2 align7(int2 a, int2 b) { return _mm_alignr_pi8(a, b, 16); }
-
-// CHECK: palignr
-int2 align8(int2 a, int2 b) { return _mm_alignr_pi8(a, b, 7); }
diff --git a/clang/test/CodeGen/pr26099.c b/clang/test/CodeGen/pr26099.c
deleted file mode 100644
index 15b73b832e9d..000000000000
--- a/clang/test/CodeGen/pr26099.c
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +mmx -emit-llvm -o - -Wall -Werror
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +mmx -emit-llvm -o - -Wall -Werror
-// REQUIRES: asserts
-
-#include
-
-int __attribute__ ((__vector_size__ (8))) b;
-
-void bar(int a)
-{
- b = __builtin_ia32_vec_init_v2si (0, a);
-}
\ No newline at end of file
diff --git a/clang/test/Headers/xmmintrin.c b/clang/test/Headers/xmmintrin.c
index a75b3380368c..15e4a431df65 100644
--- a/clang/test/Headers/xmmintrin.c
+++ b/clang/test/Headers/xmmintrin.c
@@ -14,7 +14,7 @@ _MM_ALIGN16 char c;
// checking that clang emits PACKSSDW instead of PACKSSWB.
// CHECK: define{{.*}} i64 @test_mm_cvtps_pi16
-// CHECK: call x86_mmx @llvm.x86.mmx.packssdw
+// CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128
__m64 test_mm_cvtps_pi16(__m128 a) {
return _mm_cvtps_pi16(a);
diff --git a/clang/test/Sema/x86-builtin-palignr.c b/clang/test/Sema/x86-builtin-palignr.c
index e055cbb70e9e..33a963c15b00 100644
--- a/clang/test/Sema/x86-builtin-palignr.c
+++ b/clang/test/Sema/x86-builtin-palignr.c
@@ -4,5 +4,5 @@
#include
__m64 test1(__m64 a, __m64 b, int c) {
- return _mm_alignr_pi8(a, b, c); // expected-error {{argument to '__builtin_ia32_palignr' must be a constant integer}}
+ return _mm_alignr_pi8(a, b, c); // expected-error {{argument to '__builtin_ia32_psrldqi128_byteshift' must be a constant integer}}
}
diff --git a/clang/www/builtins.py b/clang/www/builtins.py
index 0c2e181a8cfa..849e6bd4a816 100755
--- a/clang/www/builtins.py
+++ b/clang/www/builtins.py
@@ -4,8 +4,9 @@ import sys, fileinput
err = 0
-# Giant associative set of builtin->intrinsic mappings where clang doesn't
-# implement the builtin since the vector operation works by default.
+# Giant associative set of builtin->intrinsic mappings where clang
+# doesn't implement the builtin. (Either because the vector operation
+# works without a builtin, or for other reasons.)
repl_map = {
"__builtin_ia32_addps": "_mm_add_ps",
@@ -134,6 +135,99 @@ repl_map = {
"__builtin_ia32_vec_ext_v2di": "_mm_extract_epi64",
"__builtin_ia32_vec_ext_v4hi": "_mm_extract_pi16",
"__builtin_ia32_vec_ext_v4sf": "_mm_extract_ps",
+ # Removed MMX builtins
+ "__builtin_ia32_paddb": "_mm_add_pi8",
+ "__builtin_ia32_paddw": "_mm_add_pi16",
+ "__builtin_ia32_paddd": "_mm_add_pi32",
+ "__builtin_ia32_paddsb": "_mm_adds_pi8",
+ "__builtin_ia32_paddsw": "_mm_adds_pi16",
+ "__builtin_ia32_paddusb": "_mm_adds_pu8",
+ "__builtin_ia32_paddusw": "_mm_adds_pu16",
+ "__builtin_ia32_psubb": "_mm_sub_pi8",
+ "__builtin_ia32_psubw": "_mm_sub_pi16",
+ "__builtin_ia32_psubd": "_mm_sub_pi32",
+ "__builtin_ia32_psubsb": "_mm_subs_pi8",
+ "__builtin_ia32_psubsw": "_mm_subs_pi16",
+ "__builtin_ia32_psubusb": "_mm_subs_pu8",
+ "__builtin_ia32_psubusw": "_mm_subs_pu16",
+ "__builtin_ia32_pmulhw": "_mm_mulhi_pi16",
+ "__builtin_ia32_pmullw": "_mm_mullo_pi16",
+ "__builtin_ia32_pmaddwd": "_mm_madd_pi16",
+ "__builtin_ia32_pand": "_mm_and_si64",
+ "__builtin_ia32_pandn": "_mm_andnot_si64",
+ "__builtin_ia32_por": "_mm_or_si64",
+ "__builtin_ia32_pxor": "_mm_xor_si64",
+ "__builtin_ia32_psllw": "_mm_sll_pi16",
+ "__builtin_ia32_pslld": "_mm_sll_pi32",
+ "__builtin_ia32_psllq": "_mm_sll_si64",
+ "__builtin_ia32_psrlw": "_mm_srl_pi16",
+ "__builtin_ia32_psrld": "_mm_srl_pi32",
+ "__builtin_ia32_psrlq": "_mm_srl_si64",
+ "__builtin_ia32_psraw": "_mm_sra_pi16",
+ "__builtin_ia32_psrad": "_mm_sra_pi32",
+ "__builtin_ia32_psllwi": "_mm_slli_pi16",
+ "__builtin_ia32_pslldi": "_mm_slli_pi32",
+ "__builtin_ia32_psllqi": "_mm_slli_si64",
+ "__builtin_ia32_psrlwi": "_mm_srli_pi16",
+ "__builtin_ia32_psrldi": "_mm_srli_pi32",
+ "__builtin_ia32_psrlqi": "_mm_srli_si64",
+ "__builtin_ia32_psrawi": "_mm_srai_pi16",
+ "__builtin_ia32_psradi": "_mm_srai_pi32",
+ "__builtin_ia32_packsswb": "_mm_packs_pi16",
+ "__builtin_ia32_packssdw": "_mm_packs_pi32",
+ "__builtin_ia32_packuswb": "_mm_packs_pu16",
+ "__builtin_ia32_punpckhbw": "_mm_unpackhi_pi8",
+ "__builtin_ia32_punpckhwd": "_mm_unpackhi_pi16",
+ "__builtin_ia32_punpckhdq": "_mm_unpackhi_pi32",
+ "__builtin_ia32_punpcklbw": "_mm_unpacklo_pi8",
+ "__builtin_ia32_punpcklwd": "_mm_unpacklo_pi16",
+ "__builtin_ia32_punpckldq": "_mm_unpacklo_pi32",
+ "__builtin_ia32_pcmpeqb": "_mm_cmpeq_pi8",
+ "__builtin_ia32_pcmpeqw": "_mm_cmpeq_pi16",
+ "__builtin_ia32_pcmpeqd": "_mm_cmpeq_pi32",
+ "__builtin_ia32_pcmpgtb": "_mm_cmpgt_pi8",
+ "__builtin_ia32_pcmpgtw": "_mm_cmpgt_pi16",
+ "__builtin_ia32_pcmpgtd": "_mm_cmpgt_pi32",
+ "__builtin_ia32_maskmovq": "_mm_maskmove_si64",
+ "__builtin_ia32_movntq": "_mm_stream_pi",
+ "__builtin_ia32_vec_init_v2si": "_mm_setr_pi32",
+ "__builtin_ia32_vec_init_v4hi": "_mm_setr_pi16",
+ "__builtin_ia32_vec_init_v8qi": "_mm_setr_pi8",
+ "__builtin_ia32_cvtpi2ps": "_mm_cvtpi32_ps",
+ "__builtin_ia32_cvtps2pi": "_mm_cvtps_pi32",
+ "__builtin_ia32_cvttps2pi": "_mm_cvttps_pi32",
+ "__builtin_ia32_pavgb": "_mm_avg_pu8",
+ "__builtin_ia32_pavgw": "_mm_avg_pu16",
+ "__builtin_ia32_pmaxsw": "_mm_max_pi16",
+ "__builtin_ia32_pmaxub": "_mm_max_pu8",
+ "__builtin_ia32_pminsw": "_mm_min_pi16",
+ "__builtin_ia32_pminub": "_mm_min_pu8",
+ "__builtin_ia32_pmovmskb": "_mm_movemask_pi8",
+ "__builtin_ia32_pmulhuw": "_mm_mulhi_pu16",
+ "__builtin_ia32_psadbw": "_mm_sad_pu8",
+ "__builtin_ia32_pshufw": "_mm_shuffle_pi16",
+ "__builtin_ia32_cvtpd2pi": "_mm_cvtpd_pi32",
+ "__builtin_ia32_cvtpi2pd": "_mm_cvtpi32_pd",
+ "__builtin_ia32_cvttpd2pi": "_mm_cvttpd_pi32",
+ "__builtin_ia32_paddq": "_mm_add_si64",
+ "__builtin_ia32_pmuludq": "_mm_mul_su32",
+ "__builtin_ia32_psubq": "_mm_sub_si64",
+ "__builtin_ia32_pabsb": "_mm_abs_pi8",
+ "__builtin_ia32_pabsd": "_mm_abs_pi32",
+ "__builtin_ia32_pabsw": "_mm_abs_pi16",
+ "__builtin_ia32_palignr": "_mm_alignr_pi8",
+ "__builtin_ia32_phaddd": "_mm_hadd_pi32",
+ "__builtin_ia32_phaddsw": "_mm_hadds_pi16",
+ "__builtin_ia32_phaddw": "_mm_hadd_pi16",
+ "__builtin_ia32_phsubd": "_mm_hsub_pi32",
+ "__builtin_ia32_phsubsw": "_mm_hsubs_pi16",
+ "__builtin_ia32_phsubw": "_mm_hsub_pi16",
+ "__builtin_ia32_pmaddubsw": "_mm_maddubs_pi16",
+ "__builtin_ia32_pmulhrsw": "_mm_mulhrs_pi16",
+ "__builtin_ia32_pshufb": "_mm_shuffle_pi8",
+ "__builtin_ia32_psignw": "_mm_sign_pi16",
+ "__builtin_ia32_psignb": "_mm_sign_pi8",
+ "__builtin_ia32_psignd": "_mm_sign_pi32",
}
# Special unhandled cases:
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index adc46f9789eb..b6a92136f382 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -222,11 +222,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_cvttss2si64 : ClangBuiltin<"__builtin_ia32_cvttss2si64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvtps2pi : ClangBuiltin<"__builtin_ia32_cvtps2pi">,
+ def int_x86_sse_cvtps2pi :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvttps2pi: ClangBuiltin<"__builtin_ia32_cvttps2pi">,
+ def int_x86_sse_cvttps2pi:
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvtpi2ps : ClangBuiltin<"__builtin_ia32_cvtpi2ps">,
+ def int_x86_sse_cvtpi2ps :
DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
}
@@ -426,11 +426,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_cvtsd2ss : ClangBuiltin<"__builtin_ia32_cvtsd2ss">,
DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse_cvtpd2pi : ClangBuiltin<"__builtin_ia32_cvtpd2pi">,
+ def int_x86_sse_cvtpd2pi :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse_cvttpd2pi: ClangBuiltin<"__builtin_ia32_cvttpd2pi">,
+ def int_x86_sse_cvttpd2pi:
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse_cvtpi2pd : ClangBuiltin<"__builtin_ia32_cvtpi2pd">,
+ def int_x86_sse_cvtpi2pd :
DefaultAttrsIntrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
}
@@ -512,49 +512,49 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Horizontal arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_phadd_w : ClangBuiltin<"__builtin_ia32_phaddw">,
+ def int_x86_ssse3_phadd_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phadd_w_128 : ClangBuiltin<"__builtin_ia32_phaddw128">,
DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_phadd_d : ClangBuiltin<"__builtin_ia32_phaddd">,
+ def int_x86_ssse3_phadd_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phadd_d_128 : ClangBuiltin<"__builtin_ia32_phaddd128">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_ssse3_phadd_sw : ClangBuiltin<"__builtin_ia32_phaddsw">,
+ def int_x86_ssse3_phadd_sw :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phadd_sw_128 : ClangBuiltin<"__builtin_ia32_phaddsw128">,
DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_w : ClangBuiltin<"__builtin_ia32_phsubw">,
+ def int_x86_ssse3_phsub_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_w_128 : ClangBuiltin<"__builtin_ia32_phsubw128">,
DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_d : ClangBuiltin<"__builtin_ia32_phsubd">,
+ def int_x86_ssse3_phsub_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_d_128 : ClangBuiltin<"__builtin_ia32_phsubd128">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_sw : ClangBuiltin<"__builtin_ia32_phsubsw">,
+ def int_x86_ssse3_phsub_sw :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_phsub_sw_128 : ClangBuiltin<"__builtin_ia32_phsubsw128">,
DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_pmadd_ub_sw : ClangBuiltin<"__builtin_ia32_pmaddubsw">,
+ def int_x86_ssse3_pmadd_ub_sw :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_pmadd_ub_sw_128 : ClangBuiltin<"__builtin_ia32_pmaddubsw128">,
@@ -564,7 +564,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Packed multiply high with round and scale
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_pmul_hr_sw : ClangBuiltin<"__builtin_ia32_pmulhrsw">,
+ def int_x86_ssse3_pmul_hr_sw :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
def int_x86_ssse3_pmul_hr_sw_128 : ClangBuiltin<"__builtin_ia32_pmulhrsw128">,
@@ -574,34 +574,34 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Shuffle ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_pshuf_b : ClangBuiltin<"__builtin_ia32_pshufb">,
+ def int_x86_ssse3_pshuf_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_pshuf_b_128 : ClangBuiltin<"__builtin_ia32_pshufb128">,
DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_sse_pshuf_w : ClangBuiltin<"__builtin_ia32_pshufw">,
+ def int_x86_sse_pshuf_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
[IntrNoMem, ImmArg>]>;
}
// Sign ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_psign_b : ClangBuiltin<"__builtin_ia32_psignb">,
+ def int_x86_ssse3_psign_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_psign_b_128 : ClangBuiltin<"__builtin_ia32_psignb128">,
DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_ssse3_psign_w : ClangBuiltin<"__builtin_ia32_psignw">,
+ def int_x86_ssse3_psign_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_psign_w_128 : ClangBuiltin<"__builtin_ia32_psignw128">,
DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_psign_d : ClangBuiltin<"__builtin_ia32_psignd">,
+ def int_x86_ssse3_psign_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_ssse3_psign_d_128 : ClangBuiltin<"__builtin_ia32_psignd128">,
@@ -611,13 +611,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Absolute value ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_pabs_b : ClangBuiltin<"__builtin_ia32_pabsb">,
+ def int_x86_ssse3_pabs_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_pabs_w : ClangBuiltin<"__builtin_ia32_pabsw">,
+ def int_x86_ssse3_pabs_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_pabs_d : ClangBuiltin<"__builtin_ia32_pabsd">,
+ def int_x86_ssse3_pabs_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
}
@@ -2260,118 +2260,118 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Addition
- def int_x86_mmx_padd_b : ClangBuiltin<"__builtin_ia32_paddb">,
+ def int_x86_mmx_padd_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padd_w : ClangBuiltin<"__builtin_ia32_paddw">,
+ def int_x86_mmx_padd_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padd_d : ClangBuiltin<"__builtin_ia32_paddd">,
+ def int_x86_mmx_padd_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padd_q : ClangBuiltin<"__builtin_ia32_paddq">,
+ def int_x86_mmx_padd_q :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padds_b : ClangBuiltin<"__builtin_ia32_paddsb">,
+ def int_x86_mmx_padds_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padds_w : ClangBuiltin<"__builtin_ia32_paddsw">,
+ def int_x86_mmx_padds_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_paddus_b : ClangBuiltin<"__builtin_ia32_paddusb">,
+ def int_x86_mmx_paddus_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_paddus_w : ClangBuiltin<"__builtin_ia32_paddusw">,
+ def int_x86_mmx_paddus_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
// Subtraction
- def int_x86_mmx_psub_b : ClangBuiltin<"__builtin_ia32_psubb">,
+ def int_x86_mmx_psub_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psub_w : ClangBuiltin<"__builtin_ia32_psubw">,
+ def int_x86_mmx_psub_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psub_d : ClangBuiltin<"__builtin_ia32_psubd">,
+ def int_x86_mmx_psub_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psub_q : ClangBuiltin<"__builtin_ia32_psubq">,
+ def int_x86_mmx_psub_q :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psubs_b : ClangBuiltin<"__builtin_ia32_psubsb">,
+ def int_x86_mmx_psubs_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psubs_w : ClangBuiltin<"__builtin_ia32_psubsw">,
+ def int_x86_mmx_psubs_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psubus_b : ClangBuiltin<"__builtin_ia32_psubusb">,
+ def int_x86_mmx_psubus_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psubus_w : ClangBuiltin<"__builtin_ia32_psubusw">,
+ def int_x86_mmx_psubus_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
// Multiplication
- def int_x86_mmx_pmulh_w : ClangBuiltin<"__builtin_ia32_pmulhw">,
+ def int_x86_mmx_pmulh_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pmull_w : ClangBuiltin<"__builtin_ia32_pmullw">,
+ def int_x86_mmx_pmull_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pmulhu_w : ClangBuiltin<"__builtin_ia32_pmulhuw">,
+ def int_x86_mmx_pmulhu_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pmulu_dq : ClangBuiltin<"__builtin_ia32_pmuludq">,
+ def int_x86_mmx_pmulu_dq :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pmadd_wd : ClangBuiltin<"__builtin_ia32_pmaddwd">,
+ def int_x86_mmx_pmadd_wd :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
// Bitwise operations
- def int_x86_mmx_pand : ClangBuiltin<"__builtin_ia32_pand">,
+ def int_x86_mmx_pand :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pandn : ClangBuiltin<"__builtin_ia32_pandn">,
+ def int_x86_mmx_pandn :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_por : ClangBuiltin<"__builtin_ia32_por">,
+ def int_x86_mmx_por :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pxor : ClangBuiltin<"__builtin_ia32_pxor">,
+ def int_x86_mmx_pxor :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
// Averages
- def int_x86_mmx_pavg_b : ClangBuiltin<"__builtin_ia32_pavgb">,
+ def int_x86_mmx_pavg_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pavg_w : ClangBuiltin<"__builtin_ia32_pavgw">,
+ def int_x86_mmx_pavg_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
// Maximum
- def int_x86_mmx_pmaxu_b : ClangBuiltin<"__builtin_ia32_pmaxub">,
+ def int_x86_mmx_pmaxu_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pmaxs_w : ClangBuiltin<"__builtin_ia32_pmaxsw">,
+ def int_x86_mmx_pmaxs_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
// Minimum
- def int_x86_mmx_pminu_b : ClangBuiltin<"__builtin_ia32_pminub">,
+ def int_x86_mmx_pminu_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pmins_w : ClangBuiltin<"__builtin_ia32_pminsw">,
+ def int_x86_mmx_pmins_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
// Packed sum of absolute differences
- def int_x86_mmx_psad_bw : ClangBuiltin<"__builtin_ia32_psadbw">,
+ def int_x86_mmx_psad_bw :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
}
@@ -2379,58 +2379,58 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Shift left logical
- def int_x86_mmx_psll_w : ClangBuiltin<"__builtin_ia32_psllw">,
+ def int_x86_mmx_psll_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psll_d : ClangBuiltin<"__builtin_ia32_pslld">,
+ def int_x86_mmx_psll_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psll_q : ClangBuiltin<"__builtin_ia32_psllq">,
+ def int_x86_mmx_psll_q :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psrl_w : ClangBuiltin<"__builtin_ia32_psrlw">,
+ def int_x86_mmx_psrl_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psrl_d : ClangBuiltin<"__builtin_ia32_psrld">,
+ def int_x86_mmx_psrl_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psrl_q : ClangBuiltin<"__builtin_ia32_psrlq">,
+ def int_x86_mmx_psrl_q :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psra_w : ClangBuiltin<"__builtin_ia32_psraw">,
+ def int_x86_mmx_psra_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psra_d : ClangBuiltin<"__builtin_ia32_psrad">,
+ def int_x86_mmx_psra_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
// Oddly these don't require an immediate due to a gcc compatibility issue.
- def int_x86_mmx_pslli_w : ClangBuiltin<"__builtin_ia32_psllwi">,
+ def int_x86_mmx_pslli_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_mmx_pslli_d : ClangBuiltin<"__builtin_ia32_pslldi">,
+ def int_x86_mmx_pslli_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_mmx_pslli_q : ClangBuiltin<"__builtin_ia32_psllqi">,
+ def int_x86_mmx_pslli_q :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_mmx_psrli_w : ClangBuiltin<"__builtin_ia32_psrlwi">,
+ def int_x86_mmx_psrli_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_mmx_psrli_d : ClangBuiltin<"__builtin_ia32_psrldi">,
+ def int_x86_mmx_psrli_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_mmx_psrli_q : ClangBuiltin<"__builtin_ia32_psrlqi">,
+ def int_x86_mmx_psrli_q :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_mmx_psrai_w : ClangBuiltin<"__builtin_ia32_psrawi">,
+ def int_x86_mmx_psrai_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_mmx_psrai_d : ClangBuiltin<"__builtin_ia32_psradi">,
+ def int_x86_mmx_psrai_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem]>;
}
@@ -2475,83 +2475,83 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
// Pack ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_packsswb : ClangBuiltin<"__builtin_ia32_packsswb">,
+ def int_x86_mmx_packsswb :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_packssdw : ClangBuiltin<"__builtin_ia32_packssdw">,
+ def int_x86_mmx_packssdw :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_packuswb : ClangBuiltin<"__builtin_ia32_packuswb">,
+ def int_x86_mmx_packuswb :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
}
// Unpacking ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_punpckhbw : ClangBuiltin<"__builtin_ia32_punpckhbw">,
+ def int_x86_mmx_punpckhbw :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpckhwd : ClangBuiltin<"__builtin_ia32_punpckhwd">,
+ def int_x86_mmx_punpckhwd :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpckhdq : ClangBuiltin<"__builtin_ia32_punpckhdq">,
+ def int_x86_mmx_punpckhdq :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpcklbw : ClangBuiltin<"__builtin_ia32_punpcklbw">,
+ def int_x86_mmx_punpcklbw :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpcklwd : ClangBuiltin<"__builtin_ia32_punpcklwd">,
+ def int_x86_mmx_punpcklwd :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpckldq : ClangBuiltin<"__builtin_ia32_punpckldq">,
+ def int_x86_mmx_punpckldq :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
}
// Integer comparison ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_pcmpeq_b : ClangBuiltin<"__builtin_ia32_pcmpeqb">,
+ def int_x86_mmx_pcmpeq_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pcmpeq_w : ClangBuiltin<"__builtin_ia32_pcmpeqw">,
+ def int_x86_mmx_pcmpeq_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pcmpeq_d : ClangBuiltin<"__builtin_ia32_pcmpeqd">,
+ def int_x86_mmx_pcmpeq_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pcmpgt_b : ClangBuiltin<"__builtin_ia32_pcmpgtb">,
+ def int_x86_mmx_pcmpgt_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_pcmpgt_w : ClangBuiltin<"__builtin_ia32_pcmpgtw">,
+ def int_x86_mmx_pcmpgt_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_pcmpgt_d : ClangBuiltin<"__builtin_ia32_pcmpgtd">,
+ def int_x86_mmx_pcmpgt_d :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
}
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_maskmovq : ClangBuiltin<"__builtin_ia32_maskmovq">,
+ def int_x86_mmx_maskmovq :
Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>;
- def int_x86_mmx_pmovmskb : ClangBuiltin<"__builtin_ia32_pmovmskb">,
+ def int_x86_mmx_pmovmskb :
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_movnt_dq : ClangBuiltin<"__builtin_ia32_movntq">,
+ def int_x86_mmx_movnt_dq :
Intrinsic<[], [llvm_ptr_ty, llvm_x86mmx_ty], []>;
- def int_x86_mmx_palignr_b : ClangBuiltin<"__builtin_ia32_palignr">,
+ def int_x86_mmx_palignr_b :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty],
[llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_i8_ty],
[IntrNoMem, ImmArg>]>;
- def int_x86_mmx_pextr_w : ClangBuiltin<"__builtin_ia32_vec_ext_v4hi">,
+ def int_x86_mmx_pextr_w :
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem, ImmArg>]>;
- def int_x86_mmx_pinsr_w : ClangBuiltin<"__builtin_ia32_vec_set_v4hi">,
+ def int_x86_mmx_pinsr_w :
DefaultAttrsIntrinsic<[llvm_x86mmx_ty],
[llvm_x86mmx_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg>]>;