mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 00:36:06 +00:00
[X86] LowerSelect - use BLENDV for scalar selection on all SSE41+ targets (#125853)
When we first began (2015) to lower f32/f64 selects to X86ISD::BLENDV(scalar_to_vector(),scalar_to_vector(),scalar_to_vector()), we limited it to AVX targets to avoid issues with SSE41's xmm0 constraint for the condition mask. Since then we've seen general improvements in TwoAddressInstruction and better handling of condition commutation for X86ISD::BLENDV nodes, which should address many of the original concerns of using SSE41 BLENDVPD/S. In most cases we will replace 3 logic instruction with the BLENDV node and (up to 3) additional moves. Although the BLENDV is often more expensive on original SSE41 targets, this should still be an improvement in a majority of cases. We also have no equivalent restrictions for SSE41 for v2f64/v4f32 vector selection. Fixes #105807
This commit is contained in:
parent
f845497f3b
commit
d9183fd96e
@ -24648,19 +24648,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
|
||||
DAG.getTargetConstant(SSECC, DL, MVT::i8));
|
||||
|
||||
// If we have AVX, we can use a variable vector select (VBLENDV) instead
|
||||
// of 3 logic instructions for size savings and potentially speed.
|
||||
// If we have SSE41/AVX, we can use a variable vector select (VBLENDV)
|
||||
// instead of 3 logic instructions for size savings and potentially speed.
|
||||
// Unfortunately, there is no scalar form of VBLENDV.
|
||||
|
||||
//
|
||||
// If either operand is a +0.0 constant, don't try this. We can expect to
|
||||
// optimize away at least one of the logic instructions later in that
|
||||
// case, so that sequence would be faster than a variable blend.
|
||||
|
||||
// BLENDV was introduced with SSE 4.1, but the 2 register form implicitly
|
||||
// uses XMM0 as the selection register. That may need just as many
|
||||
// instructions as the AND/ANDN/OR sequence due to register moves, so
|
||||
// don't bother.
|
||||
if (Subtarget.hasAVX() && !isNullFPConstant(Op1) &&
|
||||
if (Subtarget.hasSSE41() && !isNullFPConstant(Op1) &&
|
||||
!isNullFPConstant(Op2)) {
|
||||
// Convert to vectors, do a VSELECT, and convert back to scalar.
|
||||
// All of the conversions should be optimized away.
|
||||
|
@ -22,17 +22,26 @@ declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
|
||||
; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
|
||||
|
||||
define float @test_fmaxf(float %x, float %y) {
|
||||
; SSE-LABEL: test_fmaxf:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE-NEXT: andps %xmm1, %xmm3
|
||||
; SSE-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE-NEXT: orps %xmm3, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_fmaxf:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE2-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE2-NEXT: andps %xmm1, %xmm3
|
||||
; SSE2-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE2-NEXT: orps %xmm3, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: test_fmaxf:
|
||||
; SSE4: # %bb.0:
|
||||
; SSE4-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE4-NEXT: maxss %xmm0, %xmm2
|
||||
; SSE4-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE4-NEXT: blendvps %xmm0, %xmm1, %xmm2
|
||||
; SSE4-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_fmaxf:
|
||||
; AVX1: # %bb.0:
|
||||
@ -63,17 +72,26 @@ define float @test_fmaxf_minsize(float %x, float %y) minsize {
|
||||
; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
|
||||
|
||||
define double @test_fmax(double %x, double %y) {
|
||||
; SSE-LABEL: test_fmax:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE-NEXT: cmpunordsd %xmm0, %xmm2
|
||||
; SSE-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE-NEXT: maxsd %xmm0, %xmm1
|
||||
; SSE-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE-NEXT: orpd %xmm3, %xmm2
|
||||
; SSE-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_fmax:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE2-NEXT: cmpunordsd %xmm0, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE2-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE2-NEXT: maxsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE2-NEXT: orpd %xmm3, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: test_fmax:
|
||||
; SSE4: # %bb.0:
|
||||
; SSE4-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE4-NEXT: maxsd %xmm0, %xmm2
|
||||
; SSE4-NEXT: cmpunordsd %xmm0, %xmm0
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE4-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_fmax:
|
||||
; AVX1: # %bb.0:
|
||||
@ -111,17 +129,26 @@ define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
|
||||
}
|
||||
|
||||
define float @test_intrinsic_fmaxf(float %x, float %y) {
|
||||
; SSE-LABEL: test_intrinsic_fmaxf:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE-NEXT: andps %xmm1, %xmm3
|
||||
; SSE-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE-NEXT: orps %xmm3, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_intrinsic_fmaxf:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE2-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE2-NEXT: andps %xmm1, %xmm3
|
||||
; SSE2-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE2-NEXT: orps %xmm3, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: test_intrinsic_fmaxf:
|
||||
; SSE4: # %bb.0:
|
||||
; SSE4-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE4-NEXT: maxss %xmm0, %xmm2
|
||||
; SSE4-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE4-NEXT: blendvps %xmm0, %xmm1, %xmm2
|
||||
; SSE4-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_intrinsic_fmaxf:
|
||||
; AVX1: # %bb.0:
|
||||
@ -142,17 +169,26 @@ define float @test_intrinsic_fmaxf(float %x, float %y) {
|
||||
}
|
||||
|
||||
define double @test_intrinsic_fmax(double %x, double %y) {
|
||||
; SSE-LABEL: test_intrinsic_fmax:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE-NEXT: cmpunordsd %xmm0, %xmm2
|
||||
; SSE-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE-NEXT: maxsd %xmm0, %xmm1
|
||||
; SSE-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE-NEXT: orpd %xmm3, %xmm2
|
||||
; SSE-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_intrinsic_fmax:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE2-NEXT: cmpunordsd %xmm0, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE2-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE2-NEXT: maxsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE2-NEXT: orpd %xmm3, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: test_intrinsic_fmax:
|
||||
; SSE4: # %bb.0:
|
||||
; SSE4-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE4-NEXT: maxsd %xmm0, %xmm2
|
||||
; SSE4-NEXT: cmpunordsd %xmm0, %xmm0
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE4-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_intrinsic_fmax:
|
||||
; AVX1: # %bb.0:
|
||||
|
@ -22,17 +22,26 @@ declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
|
||||
; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
|
||||
|
||||
define float @test_fminf(float %x, float %y) {
|
||||
; SSE-LABEL: test_fminf:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE-NEXT: andps %xmm1, %xmm3
|
||||
; SSE-NEXT: minss %xmm0, %xmm1
|
||||
; SSE-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE-NEXT: orps %xmm3, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_fminf:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE2-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE2-NEXT: andps %xmm1, %xmm3
|
||||
; SSE2-NEXT: minss %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE2-NEXT: orps %xmm3, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: test_fminf:
|
||||
; SSE4: # %bb.0:
|
||||
; SSE4-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE4-NEXT: minss %xmm0, %xmm2
|
||||
; SSE4-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE4-NEXT: blendvps %xmm0, %xmm1, %xmm2
|
||||
; SSE4-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_fminf:
|
||||
; AVX1: # %bb.0:
|
||||
@ -63,17 +72,26 @@ define float @test_fminf_minsize(float %x, float %y) minsize {
|
||||
; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
|
||||
|
||||
define double @test_fmin(double %x, double %y) {
|
||||
; SSE-LABEL: test_fmin:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE-NEXT: cmpunordsd %xmm0, %xmm2
|
||||
; SSE-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE-NEXT: minsd %xmm0, %xmm1
|
||||
; SSE-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE-NEXT: orpd %xmm3, %xmm2
|
||||
; SSE-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_fmin:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE2-NEXT: cmpunordsd %xmm0, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE2-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE2-NEXT: minsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE2-NEXT: orpd %xmm3, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: test_fmin:
|
||||
; SSE4: # %bb.0:
|
||||
; SSE4-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE4-NEXT: minsd %xmm0, %xmm2
|
||||
; SSE4-NEXT: cmpunordsd %xmm0, %xmm0
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE4-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_fmin:
|
||||
; AVX1: # %bb.0:
|
||||
@ -111,17 +129,26 @@ define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) {
|
||||
}
|
||||
|
||||
define float @test_intrinsic_fminf(float %x, float %y) {
|
||||
; SSE-LABEL: test_intrinsic_fminf:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE-NEXT: andps %xmm1, %xmm3
|
||||
; SSE-NEXT: minss %xmm0, %xmm1
|
||||
; SSE-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE-NEXT: orps %xmm3, %xmm2
|
||||
; SSE-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_intrinsic_fminf:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE2-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE2-NEXT: andps %xmm1, %xmm3
|
||||
; SSE2-NEXT: minss %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE2-NEXT: orps %xmm3, %xmm2
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: test_intrinsic_fminf:
|
||||
; SSE4: # %bb.0:
|
||||
; SSE4-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE4-NEXT: minss %xmm0, %xmm2
|
||||
; SSE4-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE4-NEXT: blendvps %xmm0, %xmm1, %xmm2
|
||||
; SSE4-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_intrinsic_fminf:
|
||||
; AVX1: # %bb.0:
|
||||
@ -142,17 +169,26 @@ define float @test_intrinsic_fminf(float %x, float %y) {
|
||||
}
|
||||
|
||||
define double @test_intrinsic_fmin(double %x, double %y) {
|
||||
; SSE-LABEL: test_intrinsic_fmin:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE-NEXT: cmpunordsd %xmm0, %xmm2
|
||||
; SSE-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE-NEXT: minsd %xmm0, %xmm1
|
||||
; SSE-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE-NEXT: orpd %xmm3, %xmm2
|
||||
; SSE-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_intrinsic_fmin:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE2-NEXT: cmpunordsd %xmm0, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE2-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE2-NEXT: minsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE2-NEXT: orpd %xmm3, %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: test_intrinsic_fmin:
|
||||
; SSE4: # %bb.0:
|
||||
; SSE4-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE4-NEXT: minsd %xmm0, %xmm2
|
||||
; SSE4-NEXT: cmpunordsd %xmm0, %xmm0
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE4-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_intrinsic_fmin:
|
||||
; AVX1: # %bb.0:
|
||||
|
@ -189,10 +189,9 @@ define float @test17(float %a, float %b, float %c, float %eps) {
|
||||
; CHECK-LABEL: test17:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: cmpless %xmm0, %xmm3
|
||||
; CHECK-NEXT: andps %xmm3, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm1, %xmm3
|
||||
; CHECK-NEXT: orps %xmm2, %xmm3
|
||||
; CHECK-NEXT: movaps %xmm3, %xmm0
|
||||
; CHECK-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cmp = fcmp oge float %a, %eps
|
||||
%cond = select i1 %cmp, float %c, float %b
|
||||
@ -203,10 +202,9 @@ define double @test18(double %a, double %b, double %c, double %eps) {
|
||||
; CHECK-LABEL: test18:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: cmplesd %xmm0, %xmm3
|
||||
; CHECK-NEXT: andpd %xmm3, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm1, %xmm3
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm3
|
||||
; CHECK-NEXT: movapd %xmm3, %xmm0
|
||||
; CHECK-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; CHECK-NEXT: movapd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%cmp = fcmp oge double %a, %eps
|
||||
%cond = select i1 %cmp, double %c, double %b
|
||||
|
@ -463,14 +463,23 @@ define <2 x double> @oge(<2 x double> %x) {
|
||||
; negative test - don't create an fneg to replace 0.0 operand
|
||||
|
||||
define double @ogt_no_fneg(double %x, double %y) {
|
||||
; CHECK-LABEL: ogt_no_fneg:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorpd %xmm2, %xmm2
|
||||
; CHECK-NEXT: cmpltsd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andpd %xmm2, %xmm0
|
||||
; CHECK-NEXT: andnpd %xmm1, %xmm2
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
; SSE2-LABEL: ogt_no_fneg:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: xorpd %xmm2, %xmm2
|
||||
; SSE2-NEXT: cmpltsd %xmm0, %xmm2
|
||||
; SSE2-NEXT: andpd %xmm2, %xmm0
|
||||
; SSE2-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE2-NEXT: orpd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: ogt_no_fneg:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE41-NEXT: xorpd %xmm0, %xmm0
|
||||
; SSE41-NEXT: cmpltsd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
%cmp = fcmp ogt double %x, 0.0
|
||||
%r = select i1 %cmp, double %x, double %y
|
||||
ret double %r
|
||||
@ -479,16 +488,27 @@ define double @ogt_no_fneg(double %x, double %y) {
|
||||
; negative test - can't change the setcc for non-zero constant
|
||||
|
||||
define double @ogt_no_zero(double %x) {
|
||||
; CHECK-LABEL: ogt_no_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0]
|
||||
; CHECK-NEXT: xorpd %xmm0, %xmm1
|
||||
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
|
||||
; CHECK-NEXT: cmpltsd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andpd %xmm2, %xmm0
|
||||
; CHECK-NEXT: andnpd %xmm1, %xmm2
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
; SSE2-LABEL: ogt_no_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0]
|
||||
; SSE2-NEXT: xorpd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
|
||||
; SSE2-NEXT: cmpltsd %xmm0, %xmm2
|
||||
; SSE2-NEXT: andpd %xmm2, %xmm0
|
||||
; SSE2-NEXT: andnpd %xmm1, %xmm2
|
||||
; SSE2-NEXT: orpd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: ogt_no_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
|
||||
; SSE41-NEXT: xorpd %xmm0, %xmm2
|
||||
; SSE41-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
|
||||
; SSE41-NEXT: cmpltsd %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
%neg = fneg double %x
|
||||
%cmp = fcmp ogt double %x, 1.0
|
||||
%r = select i1 %cmp, double %x, double %neg
|
||||
|
@ -80,11 +80,11 @@ define double @olt_inverse(double %x, double %y) {
|
||||
define double @oge(double %x, double %y) {
|
||||
; STRICT-LABEL: oge:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm2
|
||||
; STRICT-NEXT: cmplesd %xmm0, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: andnpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: cmplesd %xmm2, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; RELAX-LABEL: oge:
|
||||
@ -100,10 +100,9 @@ define double @ole(double %x, double %y) {
|
||||
; STRICT-LABEL: ole:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: cmplesd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: andnpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: cmplesd %xmm1, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; RELAX-LABEL: ole:
|
||||
@ -118,11 +117,10 @@ define double @ole(double %x, double %y) {
|
||||
define double @oge_inverse(double %x, double %y) {
|
||||
; STRICT-LABEL: oge_inverse:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm2
|
||||
; STRICT-NEXT: cmplesd %xmm0, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm1
|
||||
; STRICT-NEXT: andnpd %xmm0, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: cmplesd %xmm2, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
@ -145,10 +143,8 @@ define double @ole_inverse(double %x, double %y) {
|
||||
; STRICT-LABEL: ole_inverse:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: cmplesd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm1
|
||||
; STRICT-NEXT: andnpd %xmm0, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: cmplesd %xmm1, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
@ -333,10 +329,9 @@ define double @ugt(double %x, double %y) {
|
||||
; STRICT-LABEL: ugt:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: cmpnlesd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: andnpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: cmpnlesd %xmm1, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; RELAX-LABEL: ugt:
|
||||
@ -351,11 +346,11 @@ define double @ugt(double %x, double %y) {
|
||||
define double @ult(double %x, double %y) {
|
||||
; STRICT-LABEL: ult:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm2
|
||||
; STRICT-NEXT: cmpnlesd %xmm0, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: andnpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: cmpnlesd %xmm2, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; RELAX-LABEL: ult:
|
||||
@ -371,10 +366,8 @@ define double @ugt_inverse(double %x, double %y) {
|
||||
; STRICT-LABEL: ugt_inverse:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: cmpnlesd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm1
|
||||
; STRICT-NEXT: andnpd %xmm0, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: cmpnlesd %xmm1, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
@ -396,11 +389,10 @@ define double @ugt_inverse(double %x, double %y) {
|
||||
define double @ult_inverse(double %x, double %y) {
|
||||
; STRICT-LABEL: ult_inverse:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm2
|
||||
; STRICT-NEXT: cmpnlesd %xmm0, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm1
|
||||
; STRICT-NEXT: andnpd %xmm0, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: cmpnlesd %xmm2, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
@ -738,12 +730,12 @@ define double @olt_inverse_y(double %x) {
|
||||
define double @oge_y(double %x) {
|
||||
; STRICT-LABEL: oge_y:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm2
|
||||
; STRICT-NEXT: cmplesd %xmm0, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: andnpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm1
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: cmplesd %xmm1, %xmm0
|
||||
; STRICT-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; RELAX-LABEL: oge_y:
|
||||
@ -758,12 +750,11 @@ define double @oge_y(double %x) {
|
||||
define double @ole_y(double %x) {
|
||||
; STRICT-LABEL: ole_y:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: cmplesd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: andnpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm1
|
||||
; STRICT-NEXT: cmplesd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; STRICT-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; RELAX-LABEL: ole_y:
|
||||
@ -778,12 +769,10 @@ define double @ole_y(double %x) {
|
||||
define double @oge_inverse_y(double %x) {
|
||||
; STRICT-LABEL: oge_inverse_y:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm2 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm1
|
||||
; STRICT-NEXT: cmplesd %xmm0, %xmm1
|
||||
; STRICT-NEXT: andpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andnpd %xmm0, %xmm1
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm1
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm1
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: cmplesd %xmm1, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
@ -806,12 +795,9 @@ define double @oge_inverse_y(double %x) {
|
||||
define double @ole_inverse_y(double %x) {
|
||||
; STRICT-LABEL: ole_inverse_y:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm2 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm1
|
||||
; STRICT-NEXT: cmplesd %xmm2, %xmm1
|
||||
; STRICT-NEXT: andpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andnpd %xmm0, %xmm1
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm1
|
||||
; STRICT-NEXT: cmplesd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
@ -834,12 +820,11 @@ define double @ole_inverse_y(double %x) {
|
||||
define double @ugt_y(double %x) {
|
||||
; STRICT-LABEL: ugt_y:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm2
|
||||
; STRICT-NEXT: cmpnlesd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: andnpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm1
|
||||
; STRICT-NEXT: cmpnlesd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; STRICT-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; RELAX-LABEL: ugt_y:
|
||||
@ -854,12 +839,12 @@ define double @ugt_y(double %x) {
|
||||
define double @ult_y(double %x) {
|
||||
; STRICT-LABEL: ult_y:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm2
|
||||
; STRICT-NEXT: cmpnlesd %xmm0, %xmm2
|
||||
; STRICT-NEXT: andpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: andnpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm0
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm1
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: cmpnlesd %xmm1, %xmm0
|
||||
; STRICT-NEXT: movapd {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
|
||||
; STRICT-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; RELAX-LABEL: ult_y:
|
||||
@ -874,12 +859,9 @@ define double @ult_y(double %x) {
|
||||
define double @ugt_inverse_y(double %x) {
|
||||
; STRICT-LABEL: ugt_inverse_y:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm2 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm1
|
||||
; STRICT-NEXT: cmpnlesd %xmm2, %xmm1
|
||||
; STRICT-NEXT: andpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andnpd %xmm0, %xmm1
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm1
|
||||
; STRICT-NEXT: cmpnlesd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
@ -902,12 +884,10 @@ define double @ugt_inverse_y(double %x) {
|
||||
define double @ult_inverse_y(double %x) {
|
||||
; STRICT-LABEL: ult_inverse_y:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm2 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: movapd %xmm2, %xmm1
|
||||
; STRICT-NEXT: cmpnlesd %xmm0, %xmm1
|
||||
; STRICT-NEXT: andpd %xmm1, %xmm2
|
||||
; STRICT-NEXT: andnpd %xmm0, %xmm1
|
||||
; STRICT-NEXT: orpd %xmm2, %xmm1
|
||||
; STRICT-NEXT: movapd %xmm0, %xmm1
|
||||
; STRICT-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
|
||||
; STRICT-NEXT: cmpnlesd %xmm1, %xmm0
|
||||
; STRICT-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; STRICT-NEXT: movapd %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
|
@ -1679,10 +1679,9 @@ define <4 x float> @floor_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x flo
|
||||
; SSE41: ## %bb.0:
|
||||
; SSE41-NEXT: roundss $9, %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpeqss %xmm1, %xmm0
|
||||
; SSE41-NEXT: andps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: floor_mask_ss_mask8:
|
||||
@ -1747,10 +1746,9 @@ define <2 x double> @floor_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x
|
||||
; SSE41: ## %bb.0:
|
||||
; SSE41-NEXT: roundsd $9, %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpeqsd %xmm1, %xmm0
|
||||
; SSE41-NEXT: andpd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: floor_mask_sd_mask8:
|
||||
@ -2671,10 +2669,9 @@ define <4 x float> @ceil_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x floa
|
||||
; SSE41: ## %bb.0:
|
||||
; SSE41-NEXT: roundss $10, %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpeqss %xmm1, %xmm0
|
||||
; SSE41-NEXT: andps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: ceil_mask_ss_mask8:
|
||||
@ -2739,10 +2736,9 @@ define <2 x double> @ceil_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x d
|
||||
; SSE41: ## %bb.0:
|
||||
; SSE41-NEXT: roundsd $10, %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpeqsd %xmm1, %xmm0
|
||||
; SSE41-NEXT: andpd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: ceil_mask_sd_mask8:
|
||||
|
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512,AVX512BW
|
||||
@ -36,13 +36,10 @@ define float @test_v2f32(<2 x float> %a0) {
|
||||
; SSE41-LABEL: test_v2f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: andps %xmm2, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm2
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm1
|
||||
; SSE41-NEXT: orps %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -89,21 +86,19 @@ define float @test_v3f32(<3 x float> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v3f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: andps %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE41-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE41-NEXT: orps %xmm3, %xmm2
|
||||
; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm1
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm2
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm3
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm2
|
||||
; SSE41-NEXT: andps %xmm2, %xmm0
|
||||
; SSE41-NEXT: andnps %xmm1, %xmm2
|
||||
; SSE41-NEXT: orps %xmm2, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v3f32:
|
||||
@ -166,31 +161,26 @@ define float @test_v4f32(<4 x float> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andps %xmm3, %xmm4
|
||||
; SSE41-NEXT: maxss %xmm1, %xmm3
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
|
||||
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm4
|
||||
; SSE41-NEXT: andps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm3
|
||||
; SSE41-NEXT: andps %xmm1, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: maxss %xmm1, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4f32:
|
||||
@ -266,35 +256,30 @@ define float @test_v8f32(<8 x float> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v8f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: maxps %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordps %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andps %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm1
|
||||
; SSE41-NEXT: andnps %xmm1, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm4
|
||||
; SSE41-NEXT: andps %xmm1, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; SSE41-NEXT: maxps %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpunordps %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm1, %xmm3
|
||||
; SSE41-NEXT: andps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: maxss %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm4
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm3[1]
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: maxss %xmm1, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm2
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8f32:
|
||||
@ -458,36 +443,31 @@ define float @test_v16f32(<16 x float> %a0) {
|
||||
; SSE41-NEXT: cmpunordps %xmm1, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxps %xmm4, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: maxps %xmm4, %xmm3
|
||||
; SSE41-NEXT: cmpunordps %xmm4, %xmm4
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andps %xmm2, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm4
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm3[1]
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: maxss %xmm1, %xmm2
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm4
|
||||
; SSE41-NEXT: andps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm3
|
||||
; SSE41-NEXT: andps %xmm1, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm2
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16f32:
|
||||
@ -664,19 +644,30 @@ define float @test_v16f32(<16 x float> %a0) {
|
||||
;
|
||||
|
||||
define double @test_v2f64(<2 x double> %a0) {
|
||||
; SSE-LABEL: test_v2f64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE-NEXT: movapd %xmm0, %xmm1
|
||||
; SSE-NEXT: cmpunordsd %xmm0, %xmm1
|
||||
; SSE-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE-NEXT: maxsd %xmm0, %xmm2
|
||||
; SSE-NEXT: andnpd %xmm2, %xmm1
|
||||
; SSE-NEXT: orpd %xmm3, %xmm1
|
||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_v2f64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm1
|
||||
; SSE2-NEXT: cmpunordsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE2-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE2-NEXT: maxsd %xmm0, %xmm2
|
||||
; SSE2-NEXT: andnpd %xmm2, %xmm1
|
||||
; SSE2-NEXT: orpd %xmm3, %xmm1
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_v2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxsd %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordsd %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v2f64:
|
||||
; AVX: # %bb.0:
|
||||
@ -724,15 +715,14 @@ define double @test_v4f64(<4 x double> %a0) {
|
||||
; SSE41-NEXT: maxpd %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordpd %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm2, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm1
|
||||
; SSE41-NEXT: maxsd %xmm2, %xmm1
|
||||
; SSE41-NEXT: andnpd %xmm1, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm2, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4f64:
|
||||
@ -820,15 +810,14 @@ define double @test_v8f64(<8 x double> %a0) {
|
||||
; SSE41-NEXT: cmpunordpd %xmm4, %xmm4
|
||||
; SSE41-NEXT: movapd %xmm4, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE41-NEXT: maxsd %xmm1, %xmm2
|
||||
; SSE41-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8f64:
|
||||
@ -1012,15 +1001,14 @@ define double @test_v16f64(<16 x double> %a0) {
|
||||
; SSE41-NEXT: cmpunordpd %xmm2, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE41-NEXT: maxsd %xmm1, %xmm2
|
||||
; SSE41-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16f64:
|
||||
|
@ -49,22 +49,19 @@ define float @test_v2f32(<2 x float> %a0) {
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: js .LBB1_1
|
||||
; SSE41-NEXT: # %bb.2:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: js .LBB1_2
|
||||
; SSE41-NEXT: # %bb.1:
|
||||
; SSE41-NEXT: jmp .LBB1_3
|
||||
; SSE41-NEXT: .LBB1_1:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: .LBB1_2:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm4
|
||||
; SSE41-NEXT: andps %xmm3, %xmm4
|
||||
; SSE41-NEXT: js .LBB1_4
|
||||
; SSE41-NEXT: # %bb.3:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE41-NEXT: .LBB1_4:
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm1
|
||||
; SSE41-NEXT: orps %xmm4, %xmm1
|
||||
; SSE41-NEXT: .LBB1_3:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -171,65 +168,57 @@ define float @test_v4f32(<4 x float> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: js .LBB2_2
|
||||
; SSE41-NEXT: # %bb.1:
|
||||
; SSE41-NEXT: js .LBB2_1
|
||||
; SSE41-NEXT: # %bb.2:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB2_3
|
||||
; SSE41-NEXT: .LBB2_1:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: .LBB2_3:
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm4
|
||||
; SSE41-NEXT: .LBB2_2:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm4, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm5
|
||||
; SSE41-NEXT: andps %xmm4, %xmm5
|
||||
; SSE41-NEXT: js .LBB2_4
|
||||
; SSE41-NEXT: # %bb.3:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: .LBB2_4:
|
||||
; SSE41-NEXT: maxss %xmm3, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm4, %xmm2
|
||||
; SSE41-NEXT: orps %xmm5, %xmm2
|
||||
; SSE41-NEXT: movd %xmm2, %eax
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm4
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm4
|
||||
; SSE41-NEXT: movd %xmm4, %eax
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm4
|
||||
; SSE41-NEXT: js .LBB2_6
|
||||
; SSE41-NEXT: js .LBB2_4
|
||||
; SSE41-NEXT: # %bb.5:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm4
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB2_6
|
||||
; SSE41-NEXT: .LBB2_4:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: .LBB2_6:
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm3
|
||||
; SSE41-NEXT: cmpunordss %xmm4, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm5
|
||||
; SSE41-NEXT: andps %xmm4, %xmm5
|
||||
; SSE41-NEXT: js .LBB2_8
|
||||
; SSE41-NEXT: # %bb.7:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: .LBB2_8:
|
||||
; SSE41-NEXT: maxss %xmm1, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm4, %xmm3
|
||||
; SSE41-NEXT: orps %xmm5, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
|
||||
; SSE41-NEXT: movd %xmm3, %eax
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm2
|
||||
; SSE41-NEXT: js .LBB2_10
|
||||
; SSE41-NEXT: # %bb.9:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE41-NEXT: .LBB2_10:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm4
|
||||
; SSE41-NEXT: andps %xmm2, %xmm4
|
||||
; SSE41-NEXT: js .LBB2_12
|
||||
; SSE41-NEXT: # %bb.11:
|
||||
; SSE41-NEXT: js .LBB2_7
|
||||
; SSE41-NEXT: # %bb.8:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: .LBB2_12:
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm2
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm1
|
||||
; SSE41-NEXT: orps %xmm4, %xmm1
|
||||
; SSE41-NEXT: jmp .LBB2_9
|
||||
; SSE41-NEXT: .LBB2_7:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: .LBB2_9:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4f32:
|
||||
@ -410,61 +399,53 @@ define float @test_v8f32(<8 x float> %a0) {
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; SSE41-NEXT: movd %xmm2, %eax
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: js .LBB3_2
|
||||
; SSE41-NEXT: # %bb.1:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: .LBB3_2:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andps %xmm3, %xmm4
|
||||
; SSE41-NEXT: js .LBB3_4
|
||||
; SSE41-NEXT: # %bb.3:
|
||||
; SSE41-NEXT: js .LBB3_1
|
||||
; SSE41-NEXT: # %bb.2:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB3_3
|
||||
; SSE41-NEXT: .LBB3_1:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: .LBB3_4:
|
||||
; SSE41-NEXT: maxss %xmm1, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
|
||||
; SSE41-NEXT: .LBB3_3:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movd %xmm3, %eax
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: js .LBB3_6
|
||||
; SSE41-NEXT: js .LBB3_4
|
||||
; SSE41-NEXT: # %bb.5:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm4
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB3_6
|
||||
; SSE41-NEXT: .LBB3_4:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: .LBB3_6:
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm4, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm5
|
||||
; SSE41-NEXT: andps %xmm4, %xmm5
|
||||
; SSE41-NEXT: js .LBB3_8
|
||||
; SSE41-NEXT: # %bb.7:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: .LBB3_8:
|
||||
; SSE41-NEXT: maxss %xmm3, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm4, %xmm1
|
||||
; SSE41-NEXT: orps %xmm5, %xmm1
|
||||
; SSE41-NEXT: movd %xmm1, %eax
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movd %xmm3, %eax
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: js .LBB3_10
|
||||
; SSE41-NEXT: # %bb.9:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: .LBB3_10:
|
||||
; SSE41-NEXT: js .LBB3_7
|
||||
; SSE41-NEXT: # %bb.8:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andps %xmm3, %xmm4
|
||||
; SSE41-NEXT: js .LBB3_12
|
||||
; SSE41-NEXT: # %bb.11:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: .LBB3_12:
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB3_9
|
||||
; SSE41-NEXT: .LBB3_7:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm2
|
||||
; SSE41-NEXT: .LBB3_9:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8f32:
|
||||
@ -747,73 +728,65 @@ define float @test_v16f32(<16 x float> %a0) {
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordps %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm5, %xmm2
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm5, %xmm3
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm5
|
||||
; SSE41-NEXT: movaps %xmm5, %xmm1
|
||||
; SSE41-NEXT: maxps %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm5, %xmm2
|
||||
; SSE41-NEXT: maxps %xmm3, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm5, %xmm0
|
||||
; SSE41-NEXT: cmpunordps %xmm5, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm5, %xmm1
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; SSE41-NEXT: movd %xmm1, %eax
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: js .LBB4_2
|
||||
; SSE41-NEXT: # %bb.1:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: .LBB4_2:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andps %xmm3, %xmm4
|
||||
; SSE41-NEXT: js .LBB4_4
|
||||
; SSE41-NEXT: # %bb.3:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: .LBB4_4:
|
||||
; SSE41-NEXT: maxss %xmm2, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: js .LBB4_6
|
||||
; SSE41-NEXT: # %bb.5:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm4
|
||||
; SSE41-NEXT: .LBB4_6:
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm4, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm5
|
||||
; SSE41-NEXT: andps %xmm4, %xmm5
|
||||
; SSE41-NEXT: js .LBB4_8
|
||||
; SSE41-NEXT: # %bb.7:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: .LBB4_8:
|
||||
; SSE41-NEXT: maxss %xmm3, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm4, %xmm2
|
||||
; SSE41-NEXT: orps %xmm5, %xmm2
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm5, %xmm2
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; SSE41-NEXT: movd %xmm2, %eax
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: js .LBB4_10
|
||||
; SSE41-NEXT: # %bb.9:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: .LBB4_10:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andps %xmm3, %xmm4
|
||||
; SSE41-NEXT: js .LBB4_12
|
||||
; SSE41-NEXT: # %bb.11:
|
||||
; SSE41-NEXT: js .LBB4_1
|
||||
; SSE41-NEXT: # %bb.2:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB4_3
|
||||
; SSE41-NEXT: .LBB4_1:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: .LBB4_12:
|
||||
; SSE41-NEXT: maxss %xmm1, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: .LBB4_3:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movd %xmm3, %eax
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: js .LBB4_4
|
||||
; SSE41-NEXT: # %bb.5:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB4_6
|
||||
; SSE41-NEXT: .LBB4_4:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: .LBB4_6:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movd %xmm3, %eax
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; SSE41-NEXT: testl %eax, %eax
|
||||
; SSE41-NEXT: js .LBB4_7
|
||||
; SSE41-NEXT: # %bb.8:
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB4_9
|
||||
; SSE41-NEXT: .LBB4_7:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm2
|
||||
; SSE41-NEXT: .LBB4_9:
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxss %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16f32:
|
||||
@ -986,30 +959,52 @@ define float @test_v16f32(<16 x float> %a0) {
|
||||
;
|
||||
|
||||
define double @test_v2f64(<2 x double> %a0) {
|
||||
; SSE-LABEL: test_v2f64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE-NEXT: movq %xmm0, %rax
|
||||
; SSE-NEXT: testq %rax, %rax
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE-NEXT: js .LBB5_2
|
||||
; SSE-NEXT: # %bb.1:
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE-NEXT: .LBB5_2:
|
||||
; SSE-NEXT: movdqa %xmm3, %xmm1
|
||||
; SSE-NEXT: cmpunordsd %xmm3, %xmm1
|
||||
; SSE-NEXT: movapd %xmm1, %xmm4
|
||||
; SSE-NEXT: andpd %xmm3, %xmm4
|
||||
; SSE-NEXT: js .LBB5_4
|
||||
; SSE-NEXT: # %bb.3:
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE-NEXT: .LBB5_4:
|
||||
; SSE-NEXT: maxsd %xmm2, %xmm3
|
||||
; SSE-NEXT: andnpd %xmm3, %xmm1
|
||||
; SSE-NEXT: orpd %xmm4, %xmm1
|
||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_v2f64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE2-NEXT: js .LBB5_2
|
||||
; SSE2-NEXT: # %bb.1:
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE2-NEXT: .LBB5_2:
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm1
|
||||
; SSE2-NEXT: cmpunordsd %xmm3, %xmm1
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm4
|
||||
; SSE2-NEXT: andpd %xmm3, %xmm4
|
||||
; SSE2-NEXT: js .LBB5_4
|
||||
; SSE2-NEXT: # %bb.3:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: .LBB5_4:
|
||||
; SSE2-NEXT: maxsd %xmm2, %xmm3
|
||||
; SSE2-NEXT: andnpd %xmm3, %xmm1
|
||||
; SSE2-NEXT: orpd %xmm4, %xmm1
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_v2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE41-NEXT: movq %xmm0, %rax
|
||||
; SSE41-NEXT: testq %rax, %rax
|
||||
; SSE41-NEXT: js .LBB5_1
|
||||
; SSE41-NEXT: # %bb.2:
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: jmp .LBB5_3
|
||||
; SSE41-NEXT: .LBB5_1:
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: .LBB5_3:
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxsd %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v2f64:
|
||||
; AVX: # %bb.0:
|
||||
@ -1092,34 +1087,32 @@ define double @test_v4f64(<4 x double> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v4f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm3
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE41-NEXT: maxpd %xmm3, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: maxpd %xmm2, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordpd %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
|
||||
; SSE41-NEXT: movq %xmm2, %rax
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm3[1]
|
||||
; SSE41-NEXT: movq %xmm3, %rax
|
||||
; SSE41-NEXT: testq %rax, %rax
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE41-NEXT: js .LBB6_2
|
||||
; SSE41-NEXT: # %bb.1:
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: .LBB6_2:
|
||||
; SSE41-NEXT: js .LBB6_1
|
||||
; SSE41-NEXT: # %bb.2:
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm3, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm4
|
||||
; SSE41-NEXT: andpd %xmm3, %xmm4
|
||||
; SSE41-NEXT: js .LBB6_4
|
||||
; SSE41-NEXT: # %bb.3:
|
||||
; SSE41-NEXT: jmp .LBB6_3
|
||||
; SSE41-NEXT: .LBB6_1:
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE41-NEXT: .LBB6_3:
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE41-NEXT: .LBB6_4:
|
||||
; SSE41-NEXT: maxsd %xmm1, %xmm3
|
||||
; SSE41-NEXT: andnpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm4, %xmm0
|
||||
; SSE41-NEXT: maxsd %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4f64:
|
||||
@ -1310,22 +1303,20 @@ define double @test_v8f64(<8 x double> %a0) {
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movq %xmm1, %rax
|
||||
; SSE41-NEXT: testq %rax, %rax
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: js .LBB7_2
|
||||
; SSE41-NEXT: # %bb.1:
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE41-NEXT: .LBB7_2:
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm3, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm4
|
||||
; SSE41-NEXT: andpd %xmm3, %xmm4
|
||||
; SSE41-NEXT: js .LBB7_4
|
||||
; SSE41-NEXT: # %bb.3:
|
||||
; SSE41-NEXT: js .LBB7_1
|
||||
; SSE41-NEXT: # %bb.2:
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB7_3
|
||||
; SSE41-NEXT: .LBB7_1:
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE41-NEXT: .LBB7_4:
|
||||
; SSE41-NEXT: maxsd %xmm2, %xmm3
|
||||
; SSE41-NEXT: andnpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm4, %xmm0
|
||||
; SSE41-NEXT: .LBB7_3:
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxsd %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8f64:
|
||||
@ -1646,22 +1637,20 @@ define double @test_v16f64(<16 x double> %a0) {
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movq %xmm1, %rax
|
||||
; SSE41-NEXT: testq %rax, %rax
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: js .LBB8_2
|
||||
; SSE41-NEXT: # %bb.1:
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE41-NEXT: .LBB8_2:
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm3, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm4
|
||||
; SSE41-NEXT: andpd %xmm3, %xmm4
|
||||
; SSE41-NEXT: js .LBB8_4
|
||||
; SSE41-NEXT: # %bb.3:
|
||||
; SSE41-NEXT: js .LBB8_1
|
||||
; SSE41-NEXT: # %bb.2:
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: jmp .LBB8_3
|
||||
; SSE41-NEXT: .LBB8_1:
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE41-NEXT: .LBB8_4:
|
||||
; SSE41-NEXT: maxsd %xmm2, %xmm3
|
||||
; SSE41-NEXT: andnpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm4, %xmm0
|
||||
; SSE41-NEXT: .LBB8_3:
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE41-NEXT: maxsd %xmm0, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16f64:
|
||||
@ -1792,3 +1781,5 @@ declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
|
||||
declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)
|
||||
declare double @llvm.vector.reduce.fmaximum.v8f64(<8 x double>)
|
||||
declare double @llvm.vector.reduce.fmaximum.v16f64(<16 x double>)
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; SSE: {{.*}}
|
||||
|
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512,AVX512BW
|
||||
@ -36,13 +36,10 @@ define float @test_v2f32(<2 x float> %a0) {
|
||||
; SSE41-LABEL: test_v2f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: andps %xmm2, %xmm3
|
||||
; SSE41-NEXT: minss %xmm0, %xmm2
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm1
|
||||
; SSE41-NEXT: orps %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: minss %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -99,31 +96,26 @@ define float @test_v4f32(<4 x float> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andps %xmm3, %xmm4
|
||||
; SSE41-NEXT: minss %xmm1, %xmm3
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
|
||||
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: minss %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm4
|
||||
; SSE41-NEXT: andps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: minss %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm3
|
||||
; SSE41-NEXT: andps %xmm1, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: minss %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: minss %xmm1, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: minss %xmm2, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4f32:
|
||||
@ -199,35 +191,30 @@ define float @test_v8f32(<8 x float> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v8f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: minps %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordps %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andps %xmm1, %xmm3
|
||||
; SSE41-NEXT: minss %xmm2, %xmm1
|
||||
; SSE41-NEXT: andnps %xmm1, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
; SSE41-NEXT: minss %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm4
|
||||
; SSE41-NEXT: andps %xmm1, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; SSE41-NEXT: minps %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpunordps %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: minss %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm1, %xmm3
|
||||
; SSE41-NEXT: andps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: minss %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm4
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm3[1]
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: minss %xmm1, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm2
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: minss %xmm2, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8f32:
|
||||
@ -391,36 +378,31 @@ define float @test_v16f32(<16 x float> %a0) {
|
||||
; SSE41-NEXT: cmpunordps %xmm1, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: minps %xmm4, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: minps %xmm4, %xmm3
|
||||
; SSE41-NEXT: cmpunordps %xmm4, %xmm4
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: minss %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm3, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andps %xmm2, %xmm3
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm4
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm3[1]
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm2
|
||||
; SSE41-NEXT: minss %xmm1, %xmm2
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE41-NEXT: minss %xmm0, %xmm3
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE41-NEXT: andnps %xmm3, %xmm4
|
||||
; SSE41-NEXT: andps %xmm2, %xmm0
|
||||
; SSE41-NEXT: orps %xmm4, %xmm0
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: minss %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordss %xmm0, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm0, %xmm3
|
||||
; SSE41-NEXT: andnps %xmm2, %xmm3
|
||||
; SSE41-NEXT: andps %xmm1, %xmm0
|
||||
; SSE41-NEXT: orps %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordss %xmm1, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm2
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE41-NEXT: minss %xmm2, %xmm1
|
||||
; SSE41-NEXT: cmpunordss %xmm2, %xmm2
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16f32:
|
||||
@ -597,19 +579,30 @@ define float @test_v16f32(<16 x float> %a0) {
|
||||
;
|
||||
|
||||
define double @test_v2f64(<2 x double> %a0) {
|
||||
; SSE-LABEL: test_v2f64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE-NEXT: movapd %xmm0, %xmm1
|
||||
; SSE-NEXT: cmpunordsd %xmm0, %xmm1
|
||||
; SSE-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE-NEXT: minsd %xmm0, %xmm2
|
||||
; SSE-NEXT: andnpd %xmm2, %xmm1
|
||||
; SSE-NEXT: orpd %xmm3, %xmm1
|
||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: test_v2f64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm1
|
||||
; SSE2-NEXT: cmpunordsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE2-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE2-NEXT: minsd %xmm0, %xmm2
|
||||
; SSE2-NEXT: andnpd %xmm2, %xmm1
|
||||
; SSE2-NEXT: orpd %xmm3, %xmm1
|
||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_v2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE41-NEXT: minsd %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordsd %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v2f64:
|
||||
; AVX: # %bb.0:
|
||||
@ -661,15 +654,14 @@ define double @test_v3f64(<3 x double> %a0) {
|
||||
; SSE41-NEXT: minpd %xmm0, %xmm1
|
||||
; SSE41-NEXT: cmpunordpd %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE41-NEXT: minsd %xmm1, %xmm2
|
||||
; SSE41-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v3f64:
|
||||
@ -727,15 +719,14 @@ define double @test_v4f64(<4 x double> %a0) {
|
||||
; SSE41-NEXT: minpd %xmm0, %xmm2
|
||||
; SSE41-NEXT: cmpunordpd %xmm0, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm2, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andpd %xmm1, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm1
|
||||
; SSE41-NEXT: minsd %xmm2, %xmm1
|
||||
; SSE41-NEXT: andnpd %xmm1, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm2, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4f64:
|
||||
@ -823,15 +814,14 @@ define double @test_v8f64(<8 x double> %a0) {
|
||||
; SSE41-NEXT: cmpunordpd %xmm4, %xmm4
|
||||
; SSE41-NEXT: movapd %xmm4, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE41-NEXT: minsd %xmm1, %xmm2
|
||||
; SSE41-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8f64:
|
||||
@ -1015,15 +1005,14 @@ define double @test_v16f64(<16 x double> %a0) {
|
||||
; SSE41-NEXT: cmpunordpd %xmm2, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm2
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm0
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm3
|
||||
; SSE41-NEXT: andpd %xmm2, %xmm3
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm3
|
||||
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
|
||||
; SSE41-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE41-NEXT: minsd %xmm1, %xmm2
|
||||
; SSE41-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE41-NEXT: orpd %xmm3, %xmm0
|
||||
; SSE41-NEXT: cmpunordsd %xmm1, %xmm1
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16f64:
|
||||
|
@ -113,14 +113,22 @@ define float @fsel_zero_true_val(float %a, float %b, float %x) {
|
||||
}
|
||||
|
||||
define double @fsel_nonzero_false_val(double %x, double %y, double %z) {
|
||||
; SSE-LABEL: fsel_nonzero_false_val:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cmpeqsd %xmm1, %xmm0
|
||||
; SSE-NEXT: andpd %xmm0, %xmm2
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
|
||||
; SSE-NEXT: andnpd %xmm1, %xmm0
|
||||
; SSE-NEXT: orpd %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: fsel_nonzero_false_val:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: cmpeqsd %xmm1, %xmm0
|
||||
; SSE2-NEXT: andpd %xmm0, %xmm2
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
|
||||
; SSE2-NEXT: andnpd %xmm1, %xmm0
|
||||
; SSE2-NEXT: orpd %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE42-LABEL: fsel_nonzero_false_val:
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: cmpeqsd %xmm1, %xmm0
|
||||
; SSE42-NEXT: movapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
|
||||
; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
; SSE42-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE42-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: fsel_nonzero_false_val:
|
||||
; AVX: # %bb.0:
|
||||
@ -142,14 +150,21 @@ define double @fsel_nonzero_false_val(double %x, double %y, double %z) {
|
||||
}
|
||||
|
||||
define double @fsel_nonzero_true_val(double %x, double %y, double %z) {
|
||||
; SSE-LABEL: fsel_nonzero_true_val:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cmpeqsd %xmm1, %xmm0
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
|
||||
; SSE-NEXT: andpd %xmm0, %xmm1
|
||||
; SSE-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE-NEXT: orpd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: fsel_nonzero_true_val:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: cmpeqsd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
|
||||
; SSE2-NEXT: andpd %xmm0, %xmm1
|
||||
; SSE2-NEXT: andnpd %xmm2, %xmm0
|
||||
; SSE2-NEXT: orpd %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE42-LABEL: fsel_nonzero_true_val:
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: cmpeqsd %xmm1, %xmm0
|
||||
; SSE42-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
|
||||
; SSE42-NEXT: movapd %xmm2, %xmm0
|
||||
; SSE42-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: fsel_nonzero_true_val:
|
||||
; AVX: # %bb.0:
|
||||
|
Loading…
x
Reference in New Issue
Block a user