[DAG] Add legalization handling for ABDS/ABDU (#92576) (REAPPLIED)

Always match ABD patterns pre-legalization, and use TargetLowering::expandABD to expand again during legalization.

abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), usub_overflow(lhs, rhs)), usub_overflow(lhs, rhs))
Alive2: https://alive2.llvm.org/ce/z/dVdMyv

REAPPLIED: Fix regression issue with "abs(ext(x) - ext(y)) -> zext(abd(x, y))" fold failing after type legalization
This commit is contained in:
Simon Pilgrim 2024-08-08 11:38:55 +01:00
parent c4e77280f0
commit 13d04fa560
29 changed files with 3229 additions and 4107 deletions

View File

@ -4091,13 +4091,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// smax(a,b) - smin(a,b) --> abds(a,b)
if (hasOperation(ISD::ABDS, VT) &&
if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
sd_match(N1, m_SMin(m_Specific(A), m_Specific(B))))
return DAG.getNode(ISD::ABDS, DL, VT, A, B);
// umax(a,b) - umin(a,b) --> abdu(a,b)
if (hasOperation(ISD::ABDU, VT) &&
if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
sd_match(N1, m_UMin(m_Specific(A), m_Specific(B))))
return DAG.getNode(ISD::ABDU, DL, VT, A, B);
@ -5263,6 +5263,10 @@ SDValue DAGCombiner::visitABD(SDNode *N) {
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
// fold (abd x, x) -> 0
if (N0 == N1)
return DAG.getConstant(0, DL, VT);
SDValue X;
// fold (abds x, 0) -> abs x
@ -10924,6 +10928,7 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
Opc0 != ISD::SIGN_EXTEND_INREG)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
// Don't fold this for unsupported types as we lose the NSW handling.
if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
TLI.preferABDSToABSWithNSW(VT)) {
SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
@ -10946,7 +10951,8 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
if ((VT0 == MaxVT || Op0->hasOneUse()) &&
(VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
(VT1 == MaxVT || Op1->hasOneUse()) &&
(!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
@ -10956,7 +10962,7 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
// fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
// fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
if (hasOperation(ABDOpcode, VT)) {
if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
@ -11580,7 +11586,7 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
EVT VT = LHS.getValueType();
if (!hasOperation(ABDOpc, VT))
if (LegalOperations && !hasOperation(ABDOpc, VT))
return SDValue();
switch (CC) {

View File

@ -192,6 +192,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_SUB:
case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
case ISD::ABDS:
case ISD::AVGCEILS:
case ISD::AVGFLOORS:
case ISD::VP_SMIN:
@ -201,6 +202,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_SDIV:
case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::ABDU:
case ISD::AVGCEILU:
case ISD::AVGFLOORU:
case ISD::VP_UMIN:
@ -2791,6 +2793,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
case ISD::ABDS:
case ISD::ABDU: ExpandIntRes_ABD(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
@ -3850,6 +3854,11 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
Hi = DAG.getConstant(0, dl, NVT);
}
void DAGTypeLegalizer::ExpandIntRes_ABD(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Result = TLI.expandABD(N, DAG);
SplitInteger(Result, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);

View File

@ -448,6 +448,7 @@ private:
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ABD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);

View File

@ -147,6 +147,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
case ISD::FLDEXP:
case ISD::ABDS:
case ISD::ABDU:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@ -1233,6 +1235,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MUL: case ISD::VP_MUL:
case ISD::MULHS:
case ISD::MULHU:
case ISD::ABDS:
case ISD::ABDU:
case ISD::AVGCEILS:
case ISD::AVGCEILU:
case ISD::AVGFLOORS:
@ -4368,6 +4372,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MUL: case ISD::VP_MUL:
case ISD::MULHS:
case ISD::MULHU:
case ISD::ABDS:
case ISD::ABDU:
case ISD::OR: case ISD::VP_OR:
case ISD::SUB: case ISD::VP_SUB:
case ISD::XOR: case ISD::VP_XOR:

View File

@ -7024,6 +7024,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
return getNode(ISD::XOR, DL, VT, N1, N2);
break;
case ISD::SMIN:
case ISD::UMAX:

View File

@ -9311,6 +9311,21 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
// If the subtract doesn't overflow then just use abs(sub())
// NOTE: don't use frozen operands for value tracking.
bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
DAG.SignBitIsZero(N->getOperand(0));
if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
N->getOperand(1)))
return DAG.getNode(ISD::ABS, dl, VT,
DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
N->getOperand(0)))
return DAG.getNode(ISD::ABS, dl, VT,
DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
@ -9324,6 +9339,23 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
}
// Similar to the branchless expansion, use the (sign-extended) usubo overflow
// flag if the (scalar) type is illegal as this is more likely to legalize
// cleanly:
// abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
SDValue USubO =
DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
}
// FIXME: Should really try to split the vector in case it's legal on a
// subvector.
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
return DAG.UnrollVectorOp(N);
// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),

View File

@ -8,13 +8,10 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxtb
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
%bext = sext i8 %b to i64
@ -28,13 +25,10 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
%bext = sext i16 %b to i64
@ -48,13 +42,10 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxtb
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
%bext = sext i8 %b to i64
@ -68,13 +59,10 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i16 %b to i64
@ -88,11 +76,10 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: sub x8, x8, w1, sxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: sub w9, w1, w8
; CHECK-NEXT: subs w8, w8, w1
; CHECK-NEXT: csel w8, w8, w9, gt
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@ -107,13 +94,10 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i16 %b to i64
@ -127,11 +111,9 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: sub x8, x8, w1, sxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w8, w9, w8, gt
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
@ -146,12 +128,10 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: sxth w8, w1
; CHECK-NEXT: sub w9, w8, w0
; CHECK-NEXT: subs w8, w0, w8
; CHECK-NEXT: csel w8, w8, w9, gt
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
@ -166,11 +146,9 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: sub x8, x8, w1, sxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w8, w9, w8, gt
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
@ -185,13 +163,10 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, #63
; CHECK-NEXT: asr x9, x1, #63
; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: sbc x8, x8, x9
; CHECK-NEXT: asr x8, x8, #63
; CHECK-NEXT: eor x9, x10, x8
; CHECK-NEXT: sub x0, x8, x9
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x8, x9, x8, gt
; CHECK-NEXT: neg x0, x8
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@ -205,13 +180,10 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, #63
; CHECK-NEXT: asr x9, x1, #63
; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: sbc x8, x8, x9
; CHECK-NEXT: asr x8, x8, #63
; CHECK-NEXT: eor x9, x10, x8
; CHECK-NEXT: sub x0, x8, x9
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x8, x9, x8, gt
; CHECK-NEXT: neg x0, x8
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@ -225,19 +197,15 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_ext_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x1, #63
; CHECK-NEXT: asr x9, x3, #63
; CHECK-NEXT: subs x10, x0, x2
; CHECK-NEXT: sbcs x11, x1, x3
; CHECK-NEXT: sbcs xzr, x8, x9
; CHECK-NEXT: sbc x8, x8, x9
; CHECK-NEXT: asr x8, x8, #63
; CHECK-NEXT: eor x9, x10, x8
; CHECK-NEXT: eor x10, x11, x8
; CHECK-NEXT: subs x9, x9, x8
; CHECK-NEXT: sbc x8, x10, x8
; CHECK-NEXT: negs x0, x9
; CHECK-NEXT: ngc x1, x8
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbc x9, x1, x3
; CHECK-NEXT: subs x10, x2, x0
; CHECK-NEXT: sbc x11, x3, x1
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: csel x8, x8, x10, lt
; CHECK-NEXT: csel x9, x9, x11, lt
; CHECK-NEXT: negs x0, x8
; CHECK-NEXT: ngc x1, x9
; CHECK-NEXT: ret
%aext = sext i128 %a to i256
%bext = sext i128 %b to i256
@ -251,19 +219,15 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_ext_i128_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x1, #63
; CHECK-NEXT: asr x9, x3, #63
; CHECK-NEXT: subs x10, x0, x2
; CHECK-NEXT: sbcs x11, x1, x3
; CHECK-NEXT: sbcs xzr, x8, x9
; CHECK-NEXT: sbc x8, x8, x9
; CHECK-NEXT: asr x8, x8, #63
; CHECK-NEXT: eor x9, x10, x8
; CHECK-NEXT: eor x10, x11, x8
; CHECK-NEXT: subs x9, x9, x8
; CHECK-NEXT: sbc x8, x10, x8
; CHECK-NEXT: negs x0, x9
; CHECK-NEXT: ngc x1, x8
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbc x9, x1, x3
; CHECK-NEXT: subs x10, x2, x0
; CHECK-NEXT: sbc x11, x3, x1
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: csel x8, x8, x10, lt
; CHECK-NEXT: csel x9, x9, x11, lt
; CHECK-NEXT: negs x0, x8
; CHECK-NEXT: ngc x1, x9
; CHECK-NEXT: ret
%aext = sext i128 %a to i256
%bext = sext i128 %b to i256

View File

@ -8,13 +8,10 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxtb
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
%bext = sext i8 %b to i64
@ -27,13 +24,10 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
%bext = sext i16 %b to i64
@ -46,13 +40,10 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxtb
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
%bext = sext i8 %b to i64
@ -65,13 +56,10 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i16 %b to i64
@ -84,12 +72,10 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: sub x8, x8, w1, sxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: sub w9, w1, w8
; CHECK-NEXT: subs w8, w8, w1
; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@ -102,13 +88,10 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i16 %b to i64
@ -121,12 +104,9 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: sub x8, x8, w1, sxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w0, w9, w8, gt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@ -139,13 +119,10 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, sxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sxth w8, w1
; CHECK-NEXT: sub w9, w8, w0
; CHECK-NEXT: subs w8, w0, w8
; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@ -158,12 +135,9 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: sub x8, x8, w1, sxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w0, w9, w8, gt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@ -176,13 +150,9 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, #63
; CHECK-NEXT: asr x9, x1, #63
; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: sbc x8, x8, x9
; CHECK-NEXT: asr x8, x8, #63
; CHECK-NEXT: eor x9, x10, x8
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x0, x9, x8, gt
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@ -195,13 +165,9 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, #63
; CHECK-NEXT: asr x9, x1, #63
; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: sbc x8, x8, x9
; CHECK-NEXT: asr x8, x8, #63
; CHECK-NEXT: eor x9, x10, x8
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x0, x9, x8, gt
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@ -214,17 +180,14 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_ext_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x1, #63
; CHECK-NEXT: asr x9, x3, #63
; CHECK-NEXT: subs x10, x0, x2
; CHECK-NEXT: sbcs x11, x1, x3
; CHECK-NEXT: sbcs xzr, x8, x9
; CHECK-NEXT: sbc x8, x8, x9
; CHECK-NEXT: asr x8, x8, #63
; CHECK-NEXT: eor x9, x10, x8
; CHECK-NEXT: eor x10, x11, x8
; CHECK-NEXT: subs x0, x9, x8
; CHECK-NEXT: sbc x1, x10, x8
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: sbc x8, x3, x1
; CHECK-NEXT: subs x9, x0, x2
; CHECK-NEXT: sbc x10, x1, x3
; CHECK-NEXT: subs x11, x2, x0
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: csel x0, x9, x11, lt
; CHECK-NEXT: csel x1, x10, x8, lt
; CHECK-NEXT: ret
%aext = sext i128 %a to i256
%bext = sext i128 %b to i256
@ -237,17 +200,14 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_ext_i128_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x1, #63
; CHECK-NEXT: asr x9, x3, #63
; CHECK-NEXT: subs x10, x0, x2
; CHECK-NEXT: sbcs x11, x1, x3
; CHECK-NEXT: sbcs xzr, x8, x9
; CHECK-NEXT: sbc x8, x8, x9
; CHECK-NEXT: asr x8, x8, #63
; CHECK-NEXT: eor x9, x10, x8
; CHECK-NEXT: eor x10, x11, x8
; CHECK-NEXT: subs x0, x9, x8
; CHECK-NEXT: sbc x1, x10, x8
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: sbc x8, x3, x1
; CHECK-NEXT: subs x9, x0, x2
; CHECK-NEXT: sbc x10, x1, x3
; CHECK-NEXT: subs x11, x2, x0
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: csel x0, x9, x11, lt
; CHECK-NEXT: csel x1, x10, x8, lt
; CHECK-NEXT: ret
%aext = sext i128 %a to i256
%bext = sext i128 %b to i256
@ -264,12 +224,10 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w1
; CHECK-NEXT: sxtb w9, w0
; CHECK-NEXT: cmp w9, w8
; CHECK-NEXT: csel w10, w9, w8, lt
; CHECK-NEXT: csel w8, w9, w8, gt
; CHECK-NEXT: sub w0, w8, w10
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
%max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
@ -280,12 +238,10 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w1
; CHECK-NEXT: sxth w9, w0
; CHECK-NEXT: cmp w9, w8
; CHECK-NEXT: csel w10, w9, w8, lt
; CHECK-NEXT: csel w8, w9, w8, gt
; CHECK-NEXT: sub w0, w8, w10
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: sub w8, w8, w1, sxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
%max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@ -296,10 +252,9 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: csel w8, w0, w1, lt
; CHECK-NEXT: csel w9, w0, w1, gt
; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w0, w9, w8, gt
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@ -310,10 +265,9 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, x1
; CHECK-NEXT: csel x8, x0, x1, lt
; CHECK-NEXT: csel x9, x0, x1, gt
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x0, x9, x8, gt
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@ -324,16 +278,14 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_minmax_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, x2
; CHECK-NEXT: sbcs xzr, x1, x3
; CHECK-NEXT: csel x8, x1, x3, lt
; CHECK-NEXT: csel x9, x0, x2, lt
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: sbc x8, x3, x1
; CHECK-NEXT: subs x9, x0, x2
; CHECK-NEXT: sbc x10, x1, x3
; CHECK-NEXT: subs x11, x2, x0
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: csel x10, x0, x2, lt
; CHECK-NEXT: csel x11, x1, x3, lt
; CHECK-NEXT: subs x0, x10, x9
; CHECK-NEXT: sbc x1, x11, x8
; CHECK-NEXT: csel x0, x9, x11, lt
; CHECK-NEXT: csel x1, x10, x8, lt
; CHECK-NEXT: ret
%min = call i128 @llvm.smin.i128(i128 %a, i128 %b)
%max = call i128 @llvm.smax.i128(i128 %a, i128 %b)

View File

@ -8,13 +8,10 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxtb
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: sub w8, w8, w1, uxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@ -28,13 +25,10 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: sub w8, w8, w1, uxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i16 %b to i64
@ -48,13 +42,10 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxtb
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: sub w8, w8, w1, uxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@ -68,13 +59,10 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: sub w8, w8, w1, uxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@ -88,11 +76,10 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xffff
; CHECK-NEXT: sub x8, x8, w1, uxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: sub w9, w1, w8
; CHECK-NEXT: subs w8, w8, w1
; CHECK-NEXT: csel w8, w8, w9, hi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@ -107,13 +94,10 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: sub w8, w8, w1, uxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@ -127,10 +111,9 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: sub x8, x8, w1, uxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w8, w9, w8, hi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
@ -145,11 +128,10 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: and w8, w1, #0xffff
; CHECK-NEXT: sub w9, w8, w0
; CHECK-NEXT: subs w8, w0, w8
; CHECK-NEXT: csel w8, w8, w9, hi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
@ -164,10 +146,9 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: sub x8, x8, w1, uxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x8, x8, mi
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w8, w9, w8, hi
; CHECK-NEXT: neg w0, w8
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
@ -182,10 +163,10 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
; CHECK-NEXT: ngc x9, xzr
; CHECK-NEXT: eor x8, x8, x9
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x8, x9, x8, hi
; CHECK-NEXT: neg x0, x8
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@ -199,10 +180,10 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
; CHECK-NEXT: ngc x9, xzr
; CHECK-NEXT: eor x8, x8, x9
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x8, x9, x8, hi
; CHECK-NEXT: neg x0, x8
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@ -218,8 +199,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbcs x9, x1, x3
; CHECK-NEXT: ngcs xzr, xzr
; CHECK-NEXT: ngc x10, xzr
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: sbfx x10, x10, #0, #1
; CHECK-NEXT: eor x8, x8, x10
; CHECK-NEXT: eor x9, x9, x10
; CHECK-NEXT: subs x8, x8, x10
@ -241,8 +222,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbcs x9, x1, x3
; CHECK-NEXT: ngcs xzr, xzr
; CHECK-NEXT: ngc x10, xzr
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: sbfx x10, x10, #0, #1
; CHECK-NEXT: eor x8, x8, x10
; CHECK-NEXT: eor x9, x9, x10
; CHECK-NEXT: subs x8, x8, x10

View File

@ -8,13 +8,10 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxtb
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: sub w8, w8, w1, uxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@ -27,13 +24,10 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: sub w8, w8, w1, uxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i16 %b to i64
@ -46,13 +40,10 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxtb
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: sub w8, w8, w1, uxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@ -65,13 +56,10 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: sub w8, w8, w1, uxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@ -84,12 +72,10 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xffff
; CHECK-NEXT: sub x8, x8, w1, uxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: sub w9, w1, w8
; CHECK-NEXT: subs w8, w8, w1
; CHECK-NEXT: csel w0, w8, w9, hi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i32 %b to i64
@ -102,13 +88,10 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: sub w8, w8, w1, uxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@ -121,11 +104,9 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: sub x8, x8, w1, uxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w0, w9, w8, hi
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@ -138,12 +119,10 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sub x8, x8, w1, uxth
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: and w8, w1, #0xffff
; CHECK-NEXT: sub w9, w8, w0
; CHECK-NEXT: subs w8, w0, w8
; CHECK-NEXT: csel w0, w8, w9, hi
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i16 %b to i64
@ -156,11 +135,9 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: sub x8, x8, w1, uxtw
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w0, w9, w8, hi
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@ -173,10 +150,9 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
; CHECK-NEXT: ngc x9, xzr
; CHECK-NEXT: eor x8, x8, x9
; CHECK-NEXT: sub x0, x8, x9
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x0, x9, x8, hi
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@ -189,10 +165,9 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
; CHECK-NEXT: ngc x9, xzr
; CHECK-NEXT: eor x8, x8, x9
; CHECK-NEXT: sub x0, x8, x9
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x0, x9, x8, hi
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@ -207,8 +182,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbcs x9, x1, x3
; CHECK-NEXT: ngcs xzr, xzr
; CHECK-NEXT: ngc x10, xzr
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: sbfx x10, x10, #0, #1
; CHECK-NEXT: eor x8, x8, x10
; CHECK-NEXT: eor x9, x9, x10
; CHECK-NEXT: subs x0, x8, x10
@ -227,8 +202,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbcs x9, x1, x3
; CHECK-NEXT: ngcs xzr, xzr
; CHECK-NEXT: ngc x10, xzr
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: sbfx x10, x10, #0, #1
; CHECK-NEXT: eor x8, x8, x10
; CHECK-NEXT: eor x9, x9, x10
; CHECK-NEXT: subs x0, x8, x10
@ -249,12 +224,10 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, #0xff
; CHECK-NEXT: and w9, w0, #0xff
; CHECK-NEXT: cmp w9, w8
; CHECK-NEXT: csel w10, w9, w8, lo
; CHECK-NEXT: csel w8, w9, w8, hi
; CHECK-NEXT: sub w0, w8, w10
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: sub w8, w8, w1, uxtb
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
%max = call i8 @llvm.umax.i8(i8 %a, i8 %b)
@ -265,12 +238,10 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, #0xffff
; CHECK-NEXT: and w9, w0, #0xffff
; CHECK-NEXT: cmp w9, w8
; CHECK-NEXT: csel w10, w9, w8, lo
; CHECK-NEXT: csel w8, w9, w8, hi
; CHECK-NEXT: sub w0, w8, w10
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: sub w8, w8, w1, uxth
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
%max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
@ -281,10 +252,9 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: csel w8, w0, w1, lo
; CHECK-NEXT: csel w9, w0, w1, hi
; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: sub w8, w1, w0
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: csel w0, w9, w8, hi
; CHECK-NEXT: ret
%min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@ -295,10 +265,9 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, x1
; CHECK-NEXT: csel x8, x0, x1, lo
; CHECK-NEXT: csel x9, x0, x1, hi
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: csel x0, x9, x8, hi
; CHECK-NEXT: ret
%min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
@ -309,16 +278,14 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_minmax_i128:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, x2
; CHECK-NEXT: sbcs xzr, x1, x3
; CHECK-NEXT: csel x8, x1, x3, lo
; CHECK-NEXT: csel x9, x0, x2, lo
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: csel x10, x0, x2, lo
; CHECK-NEXT: csel x11, x1, x3, lo
; CHECK-NEXT: subs x0, x10, x9
; CHECK-NEXT: sbc x1, x11, x8
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbcs x9, x1, x3
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: sbfx x10, x10, #0, #1
; CHECK-NEXT: eor x8, x8, x10
; CHECK-NEXT: eor x9, x9, x10
; CHECK-NEXT: subs x0, x8, x10
; CHECK-NEXT: sbc x1, x9, x10
; CHECK-NEXT: ret
%min = call i128 @llvm.umin.i128(i128 %a, i128 %b)
%max = call i128 @llvm.umax.i128(i128 %a, i128 %b)

View File

@ -1799,28 +1799,14 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: uabd_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov.d x8, v0[1]
; CHECK-NEXT: mov.d x9, v1[1]
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: fmov x12, d1
; CHECK-NEXT: asr x14, x10, #63
; CHECK-NEXT: asr x11, x8, #63
; CHECK-NEXT: asr x13, x9, #63
; CHECK-NEXT: asr x15, x12, #63
; CHECK-NEXT: subs x8, x8, x9
; CHECK-NEXT: sbc x9, x11, x13
; CHECK-NEXT: subs x10, x10, x12
; CHECK-NEXT: sbc x11, x14, x15
; CHECK-NEXT: asr x13, x9, #63
; CHECK-NEXT: asr x12, x11, #63
; CHECK-NEXT: eor x8, x8, x13
; CHECK-NEXT: eor x9, x9, x13
; CHECK-NEXT: eor x10, x10, x12
; CHECK-NEXT: eor x11, x11, x12
; CHECK-NEXT: subs x0, x10, x12
; CHECK-NEXT: sbc x1, x11, x12
; CHECK-NEXT: subs x2, x8, x13
; CHECK-NEXT: sbc x3, x9, x13
; CHECK-NEXT: cmgt.2d v2, v0, v1
; CHECK-NEXT: sub.2d v0, v0, v1
; CHECK-NEXT: mov x1, xzr
; CHECK-NEXT: mov x3, xzr
; CHECK-NEXT: eor.16b v0, v0, v2
; CHECK-NEXT: sub.2d v0, v2, v0
; CHECK-NEXT: mov.d x2, v0[1]
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%aext = sext <2 x i64> %a to <2 x i128>
%bext = sext <2 x i64> %b to <2 x i128>

View File

@ -49,10 +49,10 @@ define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-LABEL: sabd_4h_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: sshr v1.4h, v1.4h, #8
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%a.sext = sext <4 x i8> %a to <4 x i16>
@ -103,10 +103,10 @@ define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) #0 {
define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
; CHECK-LABEL: sabd_2s_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NEXT: shl v1.2s, v1.2s, #16
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%a.sext = sext <2 x i16> %a to <2 x i32>
@ -144,27 +144,10 @@ define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
; CHECK-LABEL: sabd_2d:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: mov x9, v1.d[1]
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: fmov x12, d1
; CHECK-NEXT: asr x14, x10, #63
; CHECK-NEXT: asr x11, x8, #63
; CHECK-NEXT: asr x13, x9, #63
; CHECK-NEXT: asr x15, x12, #63
; CHECK-NEXT: subs x8, x8, x9
; CHECK-NEXT: sbc x9, x11, x13
; CHECK-NEXT: subs x10, x10, x12
; CHECK-NEXT: sbc x11, x14, x15
; CHECK-NEXT: asr x9, x9, #63
; CHECK-NEXT: asr x11, x11, #63
; CHECK-NEXT: eor x8, x8, x9
; CHECK-NEXT: eor x10, x10, x11
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: sub x10, x10, x11
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmov d0, x10
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
; CHECK-NEXT: ret
%a.sext = sext <2 x i64> %a to <2 x i128>
%b.sext = sext <2 x i64> %b to <2 x i128>
@ -232,8 +215,8 @@ define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-LABEL: uabd_4h_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: bic v1.4h, #255, lsl #8
; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%a.zext = zext <4 x i8> %a to <4 x i16>
@ -285,8 +268,8 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
; CHECK-LABEL: uabd_2s_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%a.zext = zext <2 x i16> %a to <2 x i32>
@ -324,21 +307,9 @@ define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
; CHECK-LABEL: uabd_2d:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: mov x9, v1.d[1]
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: fmov x11, d1
; CHECK-NEXT: subs x8, x8, x9
; CHECK-NEXT: ngc x9, xzr
; CHECK-NEXT: subs x10, x10, x11
; CHECK-NEXT: ngc x11, xzr
; CHECK-NEXT: eor x8, x8, x9
; CHECK-NEXT: eor x10, x10, x11
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: sub x10, x10, x11
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmov d0, x10
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d
; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%a.zext = zext <2 x i64> %a to <2 x i128>
%b.zext = zext <2 x i64> %b to <2 x i128>
@ -482,9 +453,8 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
; CHECK-LABEL: smaxmin_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-NEXT: cmgt v3.2d, v1.2d, v0.2d
; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
; CHECK-NEXT: ret
%a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
@ -529,11 +499,9 @@ define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
; CHECK-LABEL: umaxmin_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v3.2d, v1.2d, v0.2d
; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d
; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
%b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)

View File

@ -24,9 +24,10 @@ define <vscale x 16 x i8> @saba_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
define <vscale x 16 x i8> @saba_b_promoted_ops(<vscale x 16 x i8> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c) #0 {
; CHECK-LABEL: saba_b_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z2.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: saba z0.b, z1.b, z2.b
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: mov z1.b, #1 // =0x1
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
%c.sext = sext <vscale x 16 x i1> %c to <vscale x 16 x i8>
@ -75,8 +76,8 @@ define <vscale x 8 x i16> @saba_h_promoted_ops(<vscale x 8 x i16> %a, <vscale x
; CHECK-LABEL: saba_h_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
; CHECK-NEXT: sxtb z2.h, p0/m, z2.h
; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
; CHECK-NEXT: saba z0.h, z1.h, z2.h
; CHECK-NEXT: ret
%b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@ -126,8 +127,8 @@ define <vscale x 4 x i32> @saba_s_promoted_ops(<vscale x 4 x i32> %a, <vscale x
; CHECK-LABEL: saba_s_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sxth z1.s, p0/m, z1.s
; CHECK-NEXT: sxth z2.s, p0/m, z2.s
; CHECK-NEXT: sxth z1.s, p0/m, z1.s
; CHECK-NEXT: saba z0.s, z1.s, z2.s
; CHECK-NEXT: ret
%b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@ -177,8 +178,8 @@ define <vscale x 2 x i64> @saba_d_promoted_ops(<vscale x 2 x i64> %a, <vscale x
; CHECK-LABEL: saba_d_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
; CHECK-NEXT: sxtw z2.d, p0/m, z2.d
; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
; CHECK-NEXT: saba z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@ -231,9 +232,10 @@ define <vscale x 16 x i8> @uaba_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
define <vscale x 16 x i8> @uaba_b_promoted_ops(<vscale x 16 x i8> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c) #0 {
; CHECK-LABEL: uaba_b_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
; CHECK-NEXT: mov z2.b, p1/z, #1 // =0x1
; CHECK-NEXT: uaba z0.b, z1.b, z2.b
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: mov z1.b, #1 // =0x1
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>
%c.zext = zext <vscale x 16 x i1> %c to <vscale x 16 x i8>
@ -281,8 +283,8 @@ define <vscale x 8 x i16> @uaba_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b,
define <vscale x 8 x i16> @uaba_h_promoted_ops(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) #0 {
; CHECK-LABEL: uaba_h_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: and z2.h, z2.h, #0xff
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: uaba z0.h, z1.h, z2.h
; CHECK-NEXT: ret
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@ -331,8 +333,8 @@ define <vscale x 4 x i32> @uaba_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b,
define <vscale x 4 x i32> @uaba_s_promoted_ops(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) #0 {
; CHECK-LABEL: uaba_s_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: and z1.s, z1.s, #0xffff
; CHECK-NEXT: and z2.s, z2.s, #0xffff
; CHECK-NEXT: and z1.s, z1.s, #0xffff
; CHECK-NEXT: uaba z0.s, z1.s, z2.s
; CHECK-NEXT: ret
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@ -381,8 +383,8 @@ define <vscale x 2 x i64> @uaba_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b,
define <vscale x 2 x i64> @uaba_d_promoted_ops(<vscale x 2 x i64> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) #0 {
; CHECK-LABEL: uaba_d_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-NEXT: uaba z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>

View File

@ -24,10 +24,9 @@ define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
; CHECK-LABEL: sabd_b_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: ret
%a.sext = sext <vscale x 16 x i1> %a to <vscale x 16 x i8>
%b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
@ -54,8 +53,8 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
; CHECK-LABEL: sabd_h_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
; CHECK-NEXT: sabd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
@ -83,8 +82,8 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
; CHECK-LABEL: sabd_s_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sxth z0.s, p0/m, z0.s
; CHECK-NEXT: sxth z1.s, p0/m, z1.s
; CHECK-NEXT: sxth z0.s, p0/m, z0.s
; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
@ -112,8 +111,8 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
; CHECK-LABEL: sabd_d_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
; CHECK-NEXT: sabd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
@ -144,10 +143,9 @@ define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
; CHECK-LABEL: uabd_b_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: uabd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%a.zext = zext <vscale x 16 x i1> %a to <vscale x 16 x i8>
%b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>
@ -173,8 +171,8 @@ define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) #0 {
; CHECK-LABEL: uabd_h_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
@ -202,8 +200,8 @@ define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) #0 {
; CHECK-LABEL: uabd_s_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: and z1.s, z1.s, #0xffff
; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
@ -231,8 +229,8 @@ define <vscale x 2 x i64> @uabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) #0 {
; CHECK-LABEL: uabd_d_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
@ -265,8 +263,8 @@ define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <v
define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a, <vscale x 4 x i16> %b) #0 {
; CHECK-LABEL: uabd_non_matching_promoted_ops:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.s, z0.s, #0xff
; CHECK-NEXT: and z1.s, z1.s, #0xffff
; CHECK-NEXT: and z0.s, z0.s, #0xff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret

View File

@ -86,9 +86,9 @@ define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat1(ptr addrspace(1) %out, i
; GCN-NEXT: s_add_u32 s16, s16, s13
; GCN-NEXT: s_addc_u32 s17, s17, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_max_u32 s3, s0, s1
; GCN-NEXT: s_min_u32 s0, s0, s1
; GCN-NEXT: s_sub_i32 s0, s3, s0
; GCN-NEXT: s_min_u32 s3, s0, s1
; GCN-NEXT: s_max_u32 s0, s0, s1
; GCN-NEXT: s_sub_i32 s0, s0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v2, s0
; GCN-NEXT: s_add_i32 s0, s0, s2

View File

@ -142,35 +142,30 @@ define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: sabd_2d:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: vmov lr, r1, d1
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: vmov r12, r0, d0
; CHECK-NEXT: subs lr, lr, r2
; CHECK-NEXT: asr r4, r1, #31
; CHECK-NEXT: sbcs r1, r1, r3
; CHECK-NEXT: sbcs r2, r4, r3, asr #31
; CHECK-NEXT: vmov r2, r5, d2
; CHECK-NEXT: sbc r3, r4, r3, asr #31
; CHECK-NEXT: eor r4, lr, r3, asr #31
; CHECK-NEXT: eor r1, r1, r3, asr #31
; CHECK-NEXT: subs r4, r4, r3, asr #31
; CHECK-NEXT: sbc lr, r1, r3, asr #31
; CHECK-NEXT: asr r3, r0, #31
; CHECK-NEXT: vmov.32 d1[0], r4
; CHECK-NEXT: subs r2, r12, r2
; CHECK-NEXT: sbcs r0, r0, r5
; CHECK-NEXT: sbcs r1, r3, r5, asr #31
; CHECK-NEXT: sbc r1, r3, r5, asr #31
; CHECK-NEXT: eor r2, r2, r1, asr #31
; CHECK-NEXT: eor r0, r0, r1, asr #31
; CHECK-NEXT: subs r2, r2, r1, asr #31
; CHECK-NEXT: sbc r0, r0, r1, asr #31
; CHECK-NEXT: vmov.32 d0[0], r2
; CHECK-NEXT: vmov.32 d1[1], lr
; CHECK-NEXT: vmov.32 d0[1], r0
; CHECK-NEXT: pop {r4, r5, r11, pc}
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vmov r0, r12, d0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: vmov r2, r3, d2
; CHECK-NEXT: vmov r1, lr, d1
; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: vsub.i64 q8, q0, q1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r12
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: subs r1, r4, r1
; CHECK-NEXT: sbcs r1, r5, lr
; CHECK-NEXT: movwlt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvnne r6, #0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vdup.32 d19, r6
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: veor q8, q8, q9
; CHECK-NEXT: vsub.i64 q0, q9, q8
; CHECK-NEXT: pop {r4, r5, r6, pc}
%a.sext = sext <2 x i64> %a to <2 x i128>
%b.sext = sext <2 x i64> %b to <2 x i128>
%sub = sub <2 x i128> %a.sext, %b.sext
@ -329,34 +324,10 @@ define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: uabd_2d:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vmov r0, r12, d3
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: vmov lr, r6, d2
; CHECK-NEXT: vmov r4, r5, d0
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r2, r3, r12
; CHECK-NEXT: sbcs r3, r1, #0
; CHECK-NEXT: sbc r3, r1, #0
; CHECK-NEXT: eor r0, r0, r3
; CHECK-NEXT: eor r2, r2, r3
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbc r2, r2, r3
; CHECK-NEXT: subs r3, r4, lr
; CHECK-NEXT: sbcs r6, r5, r6
; CHECK-NEXT: vmov.32 d1[0], r0
; CHECK-NEXT: sbcs r5, r1, #0
; CHECK-NEXT: sbc r1, r1, #0
; CHECK-NEXT: eor r3, r3, r1
; CHECK-NEXT: subs r0, r3, r1
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: eor r0, r6, r1
; CHECK-NEXT: sbc r0, r0, r1
; CHECK-NEXT: vmov.32 d1[1], r2
; CHECK-NEXT: vmov.32 d0[1], r0
; CHECK-NEXT: pop {r4, r5, r6, pc}
; CHECK-NEXT: vqsub.u64 q8, q1, q0
; CHECK-NEXT: vqsub.u64 q9, q0, q1
; CHECK-NEXT: vorr q0, q9, q8
; CHECK-NEXT: bx lr
%a.zext = zext <2 x i64> %a to <2 x i128>
%b.zext = zext <2 x i64> %b to <2 x i128>
%sub = sub <2 x i128> %a.zext, %b.zext
@ -502,56 +473,30 @@ define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
; CHECK-LABEL: smaxmin_v2i64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: vmov r1, r0, d3
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: vmov r4, lr, d1
; CHECK-NEXT: vmov r6, r8, d0
; CHECK-NEXT: subs r2, r4, r1
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: sbcs r2, lr, r0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mov r2, r1
; CHECK-NEXT: movne r3, lr
; CHECK-NEXT: movne r2, r4
; CHECK-NEXT: vmov.32 d17[0], r2
; CHECK-NEXT: vmov.32 d17[1], r3
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vmov r0, r12, d0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: vmov r2, r3, d2
; CHECK-NEXT: subs r5, r2, r6
; CHECK-NEXT: sbcs r5, r3, r8
; CHECK-NEXT: mov r7, r2
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: movne r7, r6
; CHECK-NEXT: vmov.32 d18[0], r7
; CHECK-NEXT: subs r7, r1, r4
; CHECK-NEXT: sbcs r7, r0, lr
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: movne r1, r4
; CHECK-NEXT: vmov.32 d19[0], r1
; CHECK-NEXT: subs r1, r6, r2
; CHECK-NEXT: sbcs r1, r8, r3
; CHECK-NEXT: movwlt r12, #1
; CHECK-NEXT: cmp r12, #0
; CHECK-NEXT: movne r2, r6
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: vmov.32 d16[0], r2
; CHECK-NEXT: movne r1, r8
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: movne r0, lr
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 d16[1], r1
; CHECK-NEXT: movne r3, r8
; CHECK-NEXT: vmov.32 d19[1], r0
; CHECK-NEXT: vmov.32 d18[1], r3
; CHECK-NEXT: vmov r1, lr, d1
; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: vsub.i64 q8, q0, q1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs r0, r3, r12
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: subs r1, r4, r1
; CHECK-NEXT: sbcs r1, r5, lr
; CHECK-NEXT: movwlt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvnne r6, #0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vdup.32 d19, r6
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: veor q8, q8, q9
; CHECK-NEXT: vsub.i64 q0, q9, q8
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: pop {r4, r5, r6, pc}
%a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
%b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1)
%sub = sub <2 x i64> %a, %b
@ -594,11 +539,9 @@ define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
; CHECK-LABEL: umaxmin_v2i64:
; CHECK: @ %bb.0:
; CHECK-NEXT: vqsub.u64 q8, q0, q1
; CHECK-NEXT: vqsub.u64 q9, q1, q0
; CHECK-NEXT: vsub.i64 q8, q8, q0
; CHECK-NEXT: vadd.i64 q9, q0, q9
; CHECK-NEXT: vadd.i64 q0, q9, q8
; CHECK-NEXT: vqsub.u64 q8, q1, q0
; CHECK-NEXT: vqsub.u64 q9, q0, q1
; CHECK-NEXT: vorr q0, q9, q8
; CHECK-NEXT: bx lr
%a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
%b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)

View File

@ -172,82 +172,13 @@ entry:
ret <16 x i8> %3
}
; FIXME: This does not produce the ISD::ABS that we are looking for.
; We should fix the missing canonicalization.
; We do manage to find the word version of ABS but not the halfword.
; Threfore, we end up doing more work than is required with a pair of abs for word
; instead of just one for the halfword.
define <8 x i16> @sub_absv_16_ext(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
; CHECK-PWR9-LABEL: sub_absv_16_ext:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: vmrghh v4, v2, v2
; CHECK-PWR9-NEXT: vmrglh v2, v2, v2
; CHECK-PWR9-NEXT: vmrghh v5, v3, v3
; CHECK-PWR9-NEXT: vmrglh v3, v3, v3
; CHECK-PWR9-NEXT: vextsh2w v2, v2
; CHECK-PWR9-NEXT: vextsh2w v3, v3
; CHECK-PWR9-NEXT: vextsh2w v4, v4
; CHECK-PWR9-NEXT: vextsh2w v5, v5
; CHECK-PWR9-NEXT: xvnegsp v3, v3
; CHECK-PWR9-NEXT: xvnegsp v2, v2
; CHECK-PWR9-NEXT: xvnegsp v4, v4
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: xvnegsp v3, v5
; CHECK-PWR9-NEXT: vabsduw v3, v4, v3
; CHECK-PWR9-NEXT: vpkuwum v2, v3, v2
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR8-LABEL: sub_absv_16_ext:
; CHECK-PWR8: # %bb.0: # %entry
; CHECK-PWR8-NEXT: vspltisw v4, 8
; CHECK-PWR8-NEXT: vmrglh v5, v2, v2
; CHECK-PWR8-NEXT: vadduwm v4, v4, v4
; CHECK-PWR8-NEXT: vmrghh v2, v2, v2
; CHECK-PWR8-NEXT: vmrglh v0, v3, v3
; CHECK-PWR8-NEXT: vmrghh v3, v3, v3
; CHECK-PWR8-NEXT: vslw v5, v5, v4
; CHECK-PWR8-NEXT: vslw v2, v2, v4
; CHECK-PWR8-NEXT: vslw v0, v0, v4
; CHECK-PWR8-NEXT: vslw v3, v3, v4
; CHECK-PWR8-NEXT: vsraw v5, v5, v4
; CHECK-PWR8-NEXT: vsraw v2, v2, v4
; CHECK-PWR8-NEXT: vsraw v0, v0, v4
; CHECK-PWR8-NEXT: vsraw v3, v3, v4
; CHECK-PWR8-NEXT: xxlxor v4, v4, v4
; CHECK-PWR8-NEXT: vsubuwm v2, v2, v3
; CHECK-PWR8-NEXT: vsubuwm v3, v5, v0
; CHECK-PWR8-NEXT: vsubuwm v5, v4, v3
; CHECK-PWR8-NEXT: vsubuwm v4, v4, v2
; CHECK-PWR8-NEXT: vmaxsw v3, v3, v5
; CHECK-PWR8-NEXT: vmaxsw v2, v2, v4
; CHECK-PWR8-NEXT: vpkuwum v2, v2, v3
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR7-LABEL: sub_absv_16_ext:
; CHECK-PWR7: # %bb.0: # %entry
; CHECK-PWR7-NEXT: vspltisw v4, 8
; CHECK-PWR7-NEXT: vmrglh v5, v2, v2
; CHECK-PWR7-NEXT: vmrghh v2, v2, v2
; CHECK-PWR7-NEXT: vmrglh v0, v3, v3
; CHECK-PWR7-NEXT: vmrghh v3, v3, v3
; CHECK-PWR7-NEXT: vadduwm v4, v4, v4
; CHECK-PWR7-NEXT: vslw v5, v5, v4
; CHECK-PWR7-NEXT: vslw v2, v2, v4
; CHECK-PWR7-NEXT: vslw v0, v0, v4
; CHECK-PWR7-NEXT: vslw v3, v3, v4
; CHECK-PWR7-NEXT: vsraw v5, v5, v4
; CHECK-PWR7-NEXT: vsraw v2, v2, v4
; CHECK-PWR7-NEXT: vsraw v0, v0, v4
; CHECK-PWR7-NEXT: vsraw v3, v3, v4
; CHECK-PWR7-NEXT: xxlxor v4, v4, v4
; CHECK-PWR7-NEXT: vsubuwm v2, v2, v3
; CHECK-PWR7-NEXT: vsubuwm v3, v5, v0
; CHECK-PWR7-NEXT: vsubuwm v5, v4, v3
; CHECK-PWR7-NEXT: vsubuwm v4, v4, v2
; CHECK-PWR7-NEXT: vmaxsw v3, v3, v5
; CHECK-PWR7-NEXT: vmaxsw v2, v2, v4
; CHECK-PWR7-NEXT: vpkuwum v2, v2, v3
; CHECK-PWR7-NEXT: blr
; CHECK-LABEL: sub_absv_16_ext:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
entry:
%0 = sext <8 x i16> %a to <8 x i32>
%1 = sext <8 x i16> %b to <8 x i32>
@ -1240,18 +1171,16 @@ entry:
define <4 x i32> @zext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
; CHECK-PWR9-LE-LABEL: zext_sub_absd32:
; CHECK-PWR9-LE: # %bb.0:
; CHECK-PWR9-LE-NEXT: xxlxor v4, v4, v4
; CHECK-PWR9-LE-NEXT: vmrglh v2, v4, v2
; CHECK-PWR9-LE-NEXT: vmrglh v3, v4, v3
; CHECK-PWR9-LE-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-LE-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2
; CHECK-PWR9-LE-NEXT: blr
;
; CHECK-PWR9-BE-LABEL: zext_sub_absd32:
; CHECK-PWR9-BE: # %bb.0:
; CHECK-PWR9-BE-NEXT: xxlxor v4, v4, v4
; CHECK-PWR9-BE-NEXT: vmrghh v2, v4, v2
; CHECK-PWR9-BE-NEXT: vmrghh v3, v4, v3
; CHECK-PWR9-BE-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-BE-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2
; CHECK-PWR9-BE-NEXT: blr
;
; CHECK-PWR8-LABEL: zext_sub_absd32:
@ -1287,18 +1216,16 @@ define <4 x i32> @zext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
define <8 x i16> @zext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr {
; CHECK-PWR9-LE-LABEL: zext_sub_absd16:
; CHECK-PWR9-LE: # %bb.0:
; CHECK-PWR9-LE-NEXT: xxlxor v4, v4, v4
; CHECK-PWR9-LE-NEXT: vmrglb v2, v4, v2
; CHECK-PWR9-LE-NEXT: vmrglb v3, v4, v3
; CHECK-PWR9-LE-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-LE-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-LE-NEXT: vmrglb v2, v3, v2
; CHECK-PWR9-LE-NEXT: blr
;
; CHECK-PWR9-BE-LABEL: zext_sub_absd16:
; CHECK-PWR9-BE: # %bb.0:
; CHECK-PWR9-BE-NEXT: xxlxor v4, v4, v4
; CHECK-PWR9-BE-NEXT: vmrghb v2, v4, v2
; CHECK-PWR9-BE-NEXT: vmrghb v3, v4, v3
; CHECK-PWR9-BE-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-BE-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-BE-NEXT: vmrghb v2, v3, v2
; CHECK-PWR9-BE-NEXT: blr
;
; CHECK-PWR8-LABEL: zext_sub_absd16:
@ -1335,8 +1262,8 @@ define <16 x i8> @zext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr {
; CHECK-PWR9-LABEL: zext_sub_absd8:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: xxspltib vs0, 15
; CHECK-PWR9-NEXT: xxland v2, v2, vs0
; CHECK-PWR9-NEXT: xxland v3, v3, vs0
; CHECK-PWR9-NEXT: xxland v2, v2, vs0
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
@ -1361,24 +1288,20 @@ define <16 x i8> @zext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr {
define <4 x i32> @sext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
; CHECK-PWR9-LE-LABEL: sext_sub_absd32:
; CHECK-PWR9-LE: # %bb.0:
; CHECK-PWR9-LE-NEXT: vmrglh v2, v2, v2
; CHECK-PWR9-LE-NEXT: vmrglh v3, v3, v3
; CHECK-PWR9-LE-NEXT: vextsh2w v2, v2
; CHECK-PWR9-LE-NEXT: vextsh2w v3, v3
; CHECK-PWR9-LE-NEXT: xvnegsp v3, v3
; CHECK-PWR9-LE-NEXT: xvnegsp v2, v2
; CHECK-PWR9-LE-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-LE-NEXT: vminsh v4, v2, v3
; CHECK-PWR9-LE-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-LE-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2
; CHECK-PWR9-LE-NEXT: blr
;
; CHECK-PWR9-BE-LABEL: sext_sub_absd32:
; CHECK-PWR9-BE: # %bb.0:
; CHECK-PWR9-BE-NEXT: vmrghh v2, v2, v2
; CHECK-PWR9-BE-NEXT: vmrghh v3, v3, v3
; CHECK-PWR9-BE-NEXT: vextsh2w v2, v2
; CHECK-PWR9-BE-NEXT: vextsh2w v3, v3
; CHECK-PWR9-BE-NEXT: xvnegsp v3, v3
; CHECK-PWR9-BE-NEXT: xvnegsp v2, v2
; CHECK-PWR9-BE-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-BE-NEXT: vminsh v4, v2, v3
; CHECK-PWR9-BE-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-BE-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2
; CHECK-PWR9-BE-NEXT: blr
;
; CHECK-PWR8-LABEL: sext_sub_absd32:
@ -1423,32 +1346,20 @@ define <4 x i32> @sext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
define <8 x i16> @sext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr {
; CHECK-PWR9-LE-LABEL: sext_sub_absd16:
; CHECK-PWR9-LE: # %bb.0:
; CHECK-PWR9-LE-NEXT: vmrglb v2, v2, v2
; CHECK-PWR9-LE-NEXT: vspltish v4, 8
; CHECK-PWR9-LE-NEXT: vmrglb v3, v3, v3
; CHECK-PWR9-LE-NEXT: vslh v2, v2, v4
; CHECK-PWR9-LE-NEXT: vslh v3, v3, v4
; CHECK-PWR9-LE-NEXT: vsrah v2, v2, v4
; CHECK-PWR9-LE-NEXT: vsrah v3, v3, v4
; CHECK-PWR9-LE-NEXT: vsubuhm v2, v2, v3
; CHECK-PWR9-LE-NEXT: vminsb v4, v2, v3
; CHECK-PWR9-LE-NEXT: vmaxsb v2, v2, v3
; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-LE-NEXT: vsubuhm v3, v3, v2
; CHECK-PWR9-LE-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR9-LE-NEXT: vsububm v2, v2, v4
; CHECK-PWR9-LE-NEXT: vmrglb v2, v3, v2
; CHECK-PWR9-LE-NEXT: blr
;
; CHECK-PWR9-BE-LABEL: sext_sub_absd16:
; CHECK-PWR9-BE: # %bb.0:
; CHECK-PWR9-BE-NEXT: vmrghb v2, v2, v2
; CHECK-PWR9-BE-NEXT: vspltish v4, 8
; CHECK-PWR9-BE-NEXT: vmrghb v3, v3, v3
; CHECK-PWR9-BE-NEXT: vslh v2, v2, v4
; CHECK-PWR9-BE-NEXT: vslh v3, v3, v4
; CHECK-PWR9-BE-NEXT: vsrah v2, v2, v4
; CHECK-PWR9-BE-NEXT: vsrah v3, v3, v4
; CHECK-PWR9-BE-NEXT: vsubuhm v2, v2, v3
; CHECK-PWR9-BE-NEXT: vminsb v4, v2, v3
; CHECK-PWR9-BE-NEXT: vmaxsb v2, v2, v3
; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-BE-NEXT: vsubuhm v3, v3, v2
; CHECK-PWR9-BE-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR9-BE-NEXT: vsububm v2, v2, v4
; CHECK-PWR9-BE-NEXT: vmrghb v2, v3, v2
; CHECK-PWR9-BE-NEXT: blr
;
; CHECK-PWR8-LABEL: sext_sub_absd16:
@ -1492,14 +1403,13 @@ define <16 x i8> @sext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr {
; CHECK-PWR9-LABEL: sext_sub_absd8:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: xxspltib v4, 4
; CHECK-PWR9-NEXT: vslb v2, v2, v4
; CHECK-PWR9-NEXT: vslb v3, v3, v4
; CHECK-PWR9-NEXT: vsrab v2, v2, v4
; CHECK-PWR9-NEXT: vslb v2, v2, v4
; CHECK-PWR9-NEXT: vsrab v3, v3, v4
; CHECK-PWR9-NEXT: vsububm v2, v2, v3
; CHECK-PWR9-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-NEXT: vsububm v3, v3, v2
; CHECK-PWR9-NEXT: vsrab v2, v2, v4
; CHECK-PWR9-NEXT: vminsb v4, v2, v3
; CHECK-PWR9-NEXT: vmaxsb v2, v2, v3
; CHECK-PWR9-NEXT: vsububm v2, v2, v4
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: sext_sub_absd8:
@ -1532,10 +1442,9 @@ define <4 x i32> @absd_int32_ugt(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_ugt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
; CHECK-PWR78-NEXT: vminuw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
@ -1552,11 +1461,9 @@ define <4 x i32> @absd_int32_uge(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_uge:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuw v4, v3, v2
; CHECK-PWR78-NEXT: xxlnor vs0, v4, v4
; CHECK-PWR78-NEXT: vsubuwm v4, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v2, v4, vs0
; CHECK-PWR78-NEXT: vminuw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp uge <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
@ -1573,10 +1480,9 @@ define <4 x i32> @absd_int32_ult(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_ult:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuw v4, v3, v2
; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
; CHECK-PWR78-NEXT: vminuw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ult <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
@ -1593,11 +1499,9 @@ define <4 x i32> @absd_int32_ule(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_ule:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3
; CHECK-PWR78-NEXT: xxlnor vs0, v4, v4
; CHECK-PWR78-NEXT: vsubuwm v4, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v4, v2, vs0
; CHECK-PWR78-NEXT: vminuw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ule <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
@ -1614,10 +1518,9 @@ define <8 x i16> @absd_int16_ugt(<8 x i16>, <8 x i16>) {
;
; CHECK-PWR78-LABEL: absd_int16_ugt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuh v4, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
; CHECK-PWR78-NEXT: vminuh v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@ -1634,11 +1537,9 @@ define <8 x i16> @absd_int16_uge(<8 x i16>, <8 x i16>) {
;
; CHECK-PWR78-LABEL: absd_int16_uge:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuh v4, v3, v2
; CHECK-PWR78-NEXT: vsubuhm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v3, v2
; CHECK-PWR78-NEXT: xxlnor v4, v4, v4
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
; CHECK-PWR78-NEXT: vminuh v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp uge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@ -1655,10 +1556,9 @@ define <8 x i16> @absd_int16_ult(<8 x i16>, <8 x i16>) {
;
; CHECK-PWR78-LABEL: absd_int16_ult:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuh v4, v3, v2
; CHECK-PWR78-NEXT: vsubuhm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
; CHECK-PWR78-NEXT: vminuh v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ult <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@ -1675,11 +1575,9 @@ define <8 x i16> @absd_int16_ule(<8 x i16>, <8 x i16>) {
;
; CHECK-PWR78-LABEL: absd_int16_ule:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuh v4, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v3, v2
; CHECK-PWR78-NEXT: xxlnor v4, v4, v4
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
; CHECK-PWR78-NEXT: vminuh v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ule <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@ -1696,10 +1594,9 @@ define <16 x i8> @absd_int8_ugt(<16 x i8>, <16 x i8>) {
;
; CHECK-PWR78-LABEL: absd_int8_ugt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtub v4, v2, v3
; CHECK-PWR78-NEXT: vsububm v5, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
@ -1716,11 +1613,9 @@ define <16 x i8> @absd_int8_uge(<16 x i8>, <16 x i8>) {
;
; CHECK-PWR78-LABEL: absd_int8_uge:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtub v4, v3, v2
; CHECK-PWR78-NEXT: vsububm v5, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v3, v2
; CHECK-PWR78-NEXT: xxlnor v4, v4, v4
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp uge <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
@ -1737,10 +1632,9 @@ define <16 x i8> @absd_int8_ult(<16 x i8>, <16 x i8>) {
;
; CHECK-PWR78-LABEL: absd_int8_ult:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtub v4, v3, v2
; CHECK-PWR78-NEXT: vsububm v5, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ult <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
@ -1757,11 +1651,9 @@ define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) {
;
; CHECK-PWR78-LABEL: absd_int8_ule:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtub v4, v2, v3
; CHECK-PWR78-NEXT: vsububm v5, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v3, v2
; CHECK-PWR78-NEXT: xxlnor v4, v4, v4
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ule <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
@ -1782,10 +1674,9 @@ define <4 x i32> @absd_int32_sgt(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_sgt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtsw v4, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
; CHECK-PWR78-NEXT: vminsw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp sgt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
@ -1804,11 +1695,9 @@ define <4 x i32> @absd_int32_sge(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_sge:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtsw v4, v3, v2
; CHECK-PWR78-NEXT: xxlnor vs0, v4, v4
; CHECK-PWR78-NEXT: vsubuwm v4, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v2, v4, vs0
; CHECK-PWR78-NEXT: vminsw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp sge <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
@ -1827,10 +1716,9 @@ define <4 x i32> @absd_int32_slt(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_slt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtsw v4, v3, v2
; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
; CHECK-PWR78-NEXT: vminsw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp slt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
@ -1849,11 +1737,9 @@ define <4 x i32> @absd_int32_sle(<4 x i32>, <4 x i32>) {
;
; CHECK-PWR78-LABEL: absd_int32_sle:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtsw v4, v2, v3
; CHECK-PWR78-NEXT: xxlnor vs0, v4, v4
; CHECK-PWR78-NEXT: vsubuwm v4, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v4, v2, vs0
; CHECK-PWR78-NEXT: vminsw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp sle <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
@ -1865,10 +1751,9 @@ define <4 x i32> @absd_int32_sle(<4 x i32>, <4 x i32>) {
define <8 x i16> @absd_int16_sgt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: absd_int16_sgt:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpgtsh v4, v2, v3
; CHECK-NEXT: vsubuhm v5, v2, v3
; CHECK-NEXT: vsubuhm v2, v3, v2
; CHECK-NEXT: xxsel v2, v2, v5, v4
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sgt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@ -1880,11 +1765,9 @@ define <8 x i16> @absd_int16_sgt(<8 x i16>, <8 x i16>) {
define <8 x i16> @absd_int16_sge(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: absd_int16_sge:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpgtsh v4, v3, v2
; CHECK-NEXT: vsubuhm v5, v2, v3
; CHECK-NEXT: vsubuhm v2, v3, v2
; CHECK-NEXT: xxlnor v4, v4, v4
; CHECK-NEXT: xxsel v2, v2, v5, v4
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@ -1896,10 +1779,9 @@ define <8 x i16> @absd_int16_sge(<8 x i16>, <8 x i16>) {
define <8 x i16> @absd_int16_slt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: absd_int16_slt:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpgtsh v4, v3, v2
; CHECK-NEXT: vsubuhm v5, v2, v3
; CHECK-NEXT: vsubuhm v2, v3, v2
; CHECK-NEXT: xxsel v2, v5, v2, v4
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp slt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@ -1911,11 +1793,9 @@ define <8 x i16> @absd_int16_slt(<8 x i16>, <8 x i16>) {
define <8 x i16> @absd_int16_sle(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: absd_int16_sle:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpgtsh v4, v2, v3
; CHECK-NEXT: vsubuhm v5, v2, v3
; CHECK-NEXT: vsubuhm v2, v3, v2
; CHECK-NEXT: xxlnor v4, v4, v4
; CHECK-NEXT: xxsel v2, v5, v2, v4
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sle <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
@ -1927,10 +1807,9 @@ define <8 x i16> @absd_int16_sle(<8 x i16>, <8 x i16>) {
define <16 x i8> @absd_int8_sgt(<16 x i8>, <16 x i8>) {
; CHECK-LABEL: absd_int8_sgt:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpgtsb v4, v2, v3
; CHECK-NEXT: vsububm v5, v2, v3
; CHECK-NEXT: vsububm v2, v3, v2
; CHECK-NEXT: xxsel v2, v2, v5, v4
; CHECK-NEXT: vminsb v4, v2, v3
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: vsububm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sgt <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
@ -1942,11 +1821,9 @@ define <16 x i8> @absd_int8_sgt(<16 x i8>, <16 x i8>) {
define <16 x i8> @absd_int8_sge(<16 x i8>, <16 x i8>) {
; CHECK-LABEL: absd_int8_sge:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpgtsb v4, v3, v2
; CHECK-NEXT: vsububm v5, v2, v3
; CHECK-NEXT: vsububm v2, v3, v2
; CHECK-NEXT: xxlnor v4, v4, v4
; CHECK-NEXT: xxsel v2, v2, v5, v4
; CHECK-NEXT: vminsb v4, v2, v3
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: vsububm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sge <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
@ -1958,10 +1835,9 @@ define <16 x i8> @absd_int8_sge(<16 x i8>, <16 x i8>) {
define <16 x i8> @absd_int8_slt(<16 x i8>, <16 x i8>) {
; CHECK-LABEL: absd_int8_slt:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpgtsb v4, v3, v2
; CHECK-NEXT: vsububm v5, v2, v3
; CHECK-NEXT: vsububm v2, v3, v2
; CHECK-NEXT: xxsel v2, v5, v2, v4
; CHECK-NEXT: vminsb v4, v2, v3
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: vsububm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp slt <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
@ -1973,11 +1849,9 @@ define <16 x i8> @absd_int8_slt(<16 x i8>, <16 x i8>) {
define <16 x i8> @absd_int8_sle(<16 x i8>, <16 x i8>) {
; CHECK-LABEL: absd_int8_sle:
; CHECK: # %bb.0:
; CHECK-NEXT: vcmpgtsb v4, v2, v3
; CHECK-NEXT: vsububm v5, v2, v3
; CHECK-NEXT: vsububm v2, v3, v2
; CHECK-NEXT: xxlnor v4, v4, v4
; CHECK-NEXT: xxsel v2, v5, v2, v4
; CHECK-NEXT: vminsb v4, v2, v3
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: vsububm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sle <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
@ -2006,53 +1880,55 @@ define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) {
; CHECK-PWR9-LABEL: absd_int64_ugt:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vcmpgtud v4, v2, v3
; CHECK-PWR9-NEXT: vsubudm v5, v2, v3
; CHECK-PWR9-NEXT: vsubudm v2, v3, v2
; CHECK-PWR9-NEXT: xxsel v2, v2, v5, v4
; CHECK-PWR9-NEXT: vminud v4, v2, v3
; CHECK-PWR9-NEXT: vmaxud v2, v2, v3
; CHECK-PWR9-NEXT: vsubudm v2, v2, v4
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR8-LABEL: absd_int64_ugt:
; CHECK-PWR8: # %bb.0:
; CHECK-PWR8-NEXT: vcmpgtud v4, v2, v3
; CHECK-PWR8-NEXT: vsubudm v5, v2, v3
; CHECK-PWR8-NEXT: vsubudm v2, v3, v2
; CHECK-PWR8-NEXT: xxsel v2, v2, v5, v4
; CHECK-PWR8-NEXT: vminud v4, v2, v3
; CHECK-PWR8-NEXT: vmaxud v2, v2, v3
; CHECK-PWR8-NEXT: vsubudm v2, v2, v4
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR7-LABEL: absd_int64_ugt:
; CHECK-PWR7: # %bb.0:
; CHECK-PWR7-NEXT: addi r3, r1, -64
; CHECK-PWR7-NEXT: addi r4, r1, -80
; CHECK-PWR7-NEXT: li r5, 0
; CHECK-PWR7-NEXT: li r6, -1
; CHECK-PWR7-NEXT: addi r3, r1, -96
; CHECK-PWR7-NEXT: stxvd2x v2, 0, r3
; CHECK-PWR7-NEXT: addi r3, r1, -80
; CHECK-PWR7-NEXT: stxvd2x v3, 0, r3
; CHECK-PWR7-NEXT: stxvd2x v2, 0, r4
; CHECK-PWR7-NEXT: addi r9, r1, -16
; CHECK-PWR7-NEXT: ld r3, -56(r1)
; CHECK-PWR7-NEXT: ld r3, -88(r1)
; CHECK-PWR7-NEXT: ld r4, -72(r1)
; CHECK-PWR7-NEXT: ld r8, -80(r1)
; CHECK-PWR7-NEXT: cmpld r4, r3
; CHECK-PWR7-NEXT: iselgt r7, r6, r5
; CHECK-PWR7-NEXT: std r7, -8(r1)
; CHECK-PWR7-NEXT: ld r7, -64(r1)
; CHECK-PWR7-NEXT: cmpld r8, r7
; CHECK-PWR7-NEXT: iselgt r5, r6, r5
; CHECK-PWR7-NEXT: std r5, -16(r1)
; CHECK-PWR7-NEXT: sub r5, r4, r3
; CHECK-PWR7-NEXT: ld r6, -80(r1)
; CHECK-PWR7-NEXT: sub r5, r3, r4
; CHECK-PWR7-NEXT: cmpld r3, r4
; CHECK-PWR7-NEXT: li r3, 0
; CHECK-PWR7-NEXT: li r4, -1
; CHECK-PWR7-NEXT: std r5, -56(r1)
; CHECK-PWR7-NEXT: ld r5, -96(r1)
; CHECK-PWR7-NEXT: sub r7, r5, r6
; CHECK-PWR7-NEXT: std r7, -64(r1)
; CHECK-PWR7-NEXT: iselgt r7, r4, r3
; CHECK-PWR7-NEXT: cmpld r5, r6
; CHECK-PWR7-NEXT: std r7, -40(r1)
; CHECK-PWR7-NEXT: iselgt r3, r4, r3
; CHECK-PWR7-NEXT: addi r4, r1, -64
; CHECK-PWR7-NEXT: std r3, -48(r1)
; CHECK-PWR7-NEXT: lxvw4x vs0, 0, r4
; CHECK-PWR7-NEXT: addi r4, r1, -48
; CHECK-PWR7-NEXT: lxvw4x vs1, 0, r4
; CHECK-PWR7-NEXT: addi r4, r1, -32
; CHECK-PWR7-NEXT: xxlxor vs0, vs0, vs1
; CHECK-PWR7-NEXT: stxvw4x vs0, 0, r4
; CHECK-PWR7-NEXT: ld r4, -24(r1)
; CHECK-PWR7-NEXT: sub r4, r7, r4
; CHECK-PWR7-NEXT: std r4, -8(r1)
; CHECK-PWR7-NEXT: ld r4, -32(r1)
; CHECK-PWR7-NEXT: sub r3, r3, r4
; CHECK-PWR7-NEXT: lxvd2x v2, 0, r9
; CHECK-PWR7-NEXT: std r5, -40(r1)
; CHECK-PWR7-NEXT: sub r5, r8, r7
; CHECK-PWR7-NEXT: std r5, -48(r1)
; CHECK-PWR7-NEXT: addi r5, r1, -48
; CHECK-PWR7-NEXT: lxvd2x v3, 0, r5
; CHECK-PWR7-NEXT: std r3, -24(r1)
; CHECK-PWR7-NEXT: sub r3, r7, r8
; CHECK-PWR7-NEXT: std r3, -32(r1)
; CHECK-PWR7-NEXT: addi r3, r1, -32
; CHECK-PWR7-NEXT: lxvd2x v4, 0, r3
; CHECK-PWR7-NEXT: xxsel v2, v4, v3, v2
; CHECK-PWR7-NEXT: std r3, -16(r1)
; CHECK-PWR7-NEXT: addi r3, r1, -16
; CHECK-PWR7-NEXT: lxvd2x v2, 0, r3
; CHECK-PWR7-NEXT: blr
%3 = icmp ugt <2 x i64> %0, %1
%4 = sub <2 x i64> %0, %1

View File

@ -1,31 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -mcpu=pwr9 < %s | FileCheck %s
; Widen to <16 x i8>
define <12 x i8> @zext_abdu(<12 x i8> %a, <12 x i8> %b) {
; CHECK-LABEL: zext_abdu:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK-NEXT: lxv 37, 0(3)
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
; CHECK-NEXT: lxv 33, 0(3)
; CHECK-NEXT: addis 3, 2, .LCPI0_2@toc@ha
; CHECK-NEXT: vperm 0, 4, 2, 5
; CHECK-NEXT: vperm 5, 4, 3, 5
; CHECK-NEXT: addi 3, 3, .LCPI0_2@toc@l
; CHECK-NEXT: lxv 39, 0(3)
; CHECK-NEXT: vperm 6, 4, 2, 1
; CHECK-NEXT: vperm 1, 4, 3, 1
; CHECK-NEXT: vperm 2, 4, 2, 7
; CHECK-NEXT: vperm 3, 4, 3, 7
; CHECK-NEXT: vabsduw 4, 5, 0
; CHECK-NEXT: vabsduw 2, 3, 2
; CHECK-NEXT: vabsduw 3, 1, 6
; CHECK-NEXT: vpkuwum 3, 4, 3
; CHECK-NEXT: vpkuwum 2, 2, 2
; CHECK-NEXT: vpkuhum 2, 2, 3
; CHECK-NEXT: vabsdub 2, 2, 3
; CHECK-NEXT: blr
entry:
%aa = zext <12 x i8> %a to <12 x i32>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -26,13 +26,9 @@ define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
; CHECK-LABEL: sabd_b_promoted_ops:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmerge.vim v12, v10, -1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
; CHECK-NEXT: vmin.vv v10, v12, v8
; CHECK-NEXT: vmax.vv v8, v12, v8
; CHECK-NEXT: vsub.vv v8, v8, v10
; CHECK-NEXT: vmxor.mm v0, v0, v8
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%a.sext = sext <vscale x 16 x i1> %a to <vscale x 16 x i8>
%b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
@ -158,13 +154,9 @@ define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
; CHECK-LABEL: uabd_b_promoted_ops:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
; CHECK-NEXT: vminu.vv v10, v12, v8
; CHECK-NEXT: vmaxu.vv v8, v12, v8
; CHECK-NEXT: vsub.vv v8, v8, v10
; CHECK-NEXT: vmxor.mm v0, v0, v8
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%a.zext = zext <vscale x 16 x i1> %a to <vscale x 16 x i8>
%b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>

View File

@ -9,13 +9,13 @@ define void @vp8_rd_pick_inter_mode() {
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: ldr r2, [r4]
; CHECK-NEXT: movs r1, #180
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: movs r1, #188
; CHECK-NEXT: ldr r1, [r4]
; CHECK-NEXT: movs r2, #180
; CHECK-NEXT: str r0, [r2]
; CHECK-NEXT: movs r2, #188
; CHECK-NEXT: sxth r0, r0
; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: sxth r1, r2
; CHECK-NEXT: str r1, [r2]
; CHECK-NEXT: sxth r1, r1
; CHECK-NEXT: subs r0, r0, r1
; CHECK-NEXT: it mi
; CHECK-NEXT: rsbmi r0, r0, #0

View File

@ -171,8 +171,7 @@ define arm_aapcs_vfpcc <4 x i8> @vabd_v4u8(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-NEXT: vmov.i32 q2, #0xff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vsub.i32 q0, q0, q1
; CHECK-NEXT: vabs.s32 q0, q0
; CHECK-NEXT: vabd.u32 q0, q0, q1
; CHECK-NEXT: bx lr
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>

View File

@ -305,30 +305,27 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_ext_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: sbbl %edi, %esi
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: xorl %edx, %ecx
; X86-NEXT: xorl %edx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: subl %eax, %edi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: sbbl %ecx, %ebx
; X86-NEXT: subl %edx, %eax
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: cmovll %ebx, %ecx
; X86-NEXT: cmovll %edi, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: negl %eax
; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i64:
@ -352,30 +349,27 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_ext_i64_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: sbbl %edi, %esi
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: xorl %edx, %ecx
; X86-NEXT: xorl %edx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: subl %eax, %edi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: sbbl %ecx, %ebx
; X86-NEXT: subl %edx, %eax
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: cmovll %ebx, %ecx
; X86-NEXT: cmovll %edi, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: negl %eax
; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i64_undef:
@ -403,47 +397,41 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: sbbl %edx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: sbbl %edi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl %ebp, %eax
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: sbbl %ebx, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: xorl %edi, %eax
; X86-NEXT: xorl %edi, %edx
; X86-NEXT: xorl %edi, %esi
; X86-NEXT: xorl %edi, %ecx
; X86-NEXT: subl %edi, %ecx
; X86-NEXT: sbbl %edi, %esi
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmovll %eax, %esi
; X86-NEXT: cmovll %ebx, %edi
; X86-NEXT: cmovll %ebp, %edx
; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload
; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: negl %ecx
; X86-NEXT: movl $0, %ebx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %edx, %ebp
; X86-NEXT: movl $0, %edx
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl %ebx, 4(%eax)
; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: movl %ebp, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ebx, 12(%eax)
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@ -453,23 +441,16 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X64-LABEL: abd_ext_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rsi, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: movq %rcx, %r8
; X64-NEXT: sarq $63, %r8
; X64-NEXT: subq %rdx, %rax
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: sbbq %r8, %rcx
; X64-NEXT: sbbq %r8, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: sbbq %rdi, %rsi
; X64-NEXT: movq %rsi, %r8
; X64-NEXT: sbbq %rcx, %r8
; X64-NEXT: subq %rdi, %rdx
; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: cmovlq %r8, %rcx
; X64-NEXT: cmovgeq %rdx, %rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: negq %rax
; X64-NEXT: sbbq %rsi, %rdx
; X64-NEXT: sbbq %rcx, %rdx
; X64-NEXT: retq
%aext = sext i128 %a to i256
%bext = sext i128 %b to i256
@ -487,47 +468,41 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: sbbl %edx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: sbbl %edi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl %ebp, %eax
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: sbbl %ebx, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: xorl %edi, %eax
; X86-NEXT: xorl %edi, %edx
; X86-NEXT: xorl %edi, %esi
; X86-NEXT: xorl %edi, %ecx
; X86-NEXT: subl %edi, %ecx
; X86-NEXT: sbbl %edi, %esi
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmovll %eax, %esi
; X86-NEXT: cmovll %ebx, %edi
; X86-NEXT: cmovll %ebp, %edx
; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload
; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: negl %ecx
; X86-NEXT: movl $0, %ebx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %edx, %ebp
; X86-NEXT: movl $0, %edx
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl %ebx, 4(%eax)
; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: movl %ebp, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ebx, 12(%eax)
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@ -537,23 +512,16 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X64-LABEL: abd_ext_i128_undef:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rsi, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: movq %rcx, %r8
; X64-NEXT: sarq $63, %r8
; X64-NEXT: subq %rdx, %rax
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: sbbq %r8, %rcx
; X64-NEXT: sbbq %r8, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: sbbq %rdi, %rsi
; X64-NEXT: movq %rsi, %r8
; X64-NEXT: sbbq %rcx, %r8
; X64-NEXT: subq %rdi, %rdx
; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: cmovlq %r8, %rcx
; X64-NEXT: cmovgeq %rdx, %rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: negq %rax
; X64-NEXT: sbbq %rsi, %rdx
; X64-NEXT: sbbq %rcx, %rdx
; X64-NEXT: retq
%aext = sext i128 %a to i256
%bext = sext i128 %b to i256

View File

@ -282,27 +282,24 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_ext_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: sbbl %edi, %esi
; X86-NEXT: sbbl %edi, %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: subl %eax, %edi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: sbbl %edx, %ebx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: cmovll %edi, %eax
; X86-NEXT: cmovll %ebx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i64:
@ -324,27 +321,24 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_ext_i64_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: sbbl %edi, %esi
; X86-NEXT: sbbl %edi, %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: subl %eax, %edi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: sbbl %edx, %ebx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: cmovll %edi, %eax
; X86-NEXT: cmovll %ebx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i64_undef:
@ -370,40 +364,34 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: subl %edx, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: sbbl %ecx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl %ebp, %esi
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: sbbl %ebx, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %eax, %esi
; X86-NEXT: xorl %eax, %edi
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: xorl %eax, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: sbbl %eax, %ecx
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: sbbl %eax, %esi
; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
; X86-NEXT: cmovll %ebx, %esi
; X86-NEXT: cmovll %ebp, %ecx
; X86-NEXT: cmovll %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl %edi, 8(%eax)
; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@ -413,21 +401,14 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X64-LABEL: abd_ext_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rsi, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: movq %rcx, %r8
; X64-NEXT: sarq $63, %r8
; X64-NEXT: subq %rdx, %rax
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: sbbq %r8, %rcx
; X64-NEXT: sbbq %r8, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: sbbq %rdi, %rsi
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: movq %rsi, %r8
; X64-NEXT: sbbq %rcx, %r8
; X64-NEXT: subq %rdi, %rdx
; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: cmovgeq %rdx, %rax
; X64-NEXT: cmovgeq %rcx, %r8
; X64-NEXT: movq %r8, %rdx
; X64-NEXT: retq
%aext = sext i128 %a to i256
%bext = sext i128 %b to i256
@ -444,40 +425,34 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: subl %edx, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: sbbl %ecx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl %ebp, %esi
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: sbbl %ebx, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %eax, %esi
; X86-NEXT: xorl %eax, %edi
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: xorl %eax, %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: sbbl %eax, %ecx
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: sbbl %eax, %esi
; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
; X86-NEXT: cmovll %ebx, %esi
; X86-NEXT: cmovll %ebp, %ecx
; X86-NEXT: cmovll %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl %edi, 8(%eax)
; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@ -487,21 +462,14 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X64-LABEL: abd_ext_i128_undef:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rsi, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: movq %rcx, %r8
; X64-NEXT: sarq $63, %r8
; X64-NEXT: subq %rdx, %rax
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: sbbq %r8, %rcx
; X64-NEXT: sbbq %r8, %rdi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: sbbq %rdi, %rsi
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: movq %rsi, %r8
; X64-NEXT: sbbq %rcx, %r8
; X64-NEXT: subq %rdi, %rdx
; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: cmovgeq %rdx, %rax
; X64-NEXT: cmovgeq %rcx, %r8
; X64-NEXT: movq %r8, %rdx
; X64-NEXT: retq
%aext = sext i128 %a to i256
%bext = sext i128 %b to i256
@ -602,7 +570,6 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_minmax_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
@ -610,24 +577,17 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: cmovll %esi, %edi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: cmovll %ecx, %ebx
; X86-NEXT: cmpl %ecx, %eax
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: sbbl %esi, %ebp
; X86-NEXT: cmovll %esi, %edx
; X86-NEXT: cmovll %ecx, %eax
; X86-NEXT: subl %ebx, %eax
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: subl %eax, %edi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: sbbl %edx, %ebx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: cmovll %edi, %eax
; X86-NEXT: cmovll %ebx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i64:
@ -651,53 +611,34 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $8, %esp
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: sbbl %ebx, %eax
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: movl %edi, %eax
; X86-NEXT: cmovll %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cmovll %esi, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmovll %eax, %ebp
; X86-NEXT: movl %edx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmovll %esi, %eax
; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edi, %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebx
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: sbbl %ebp, %ebx
; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %ebx, 4(%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: subl %edx, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: sbbl %ecx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
; X86-NEXT: cmovll %ebx, %esi
; X86-NEXT: cmovll %ebp, %ecx
; X86-NEXT: cmovll %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: addl $8, %esp
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@ -706,22 +647,15 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
;
; X64-LABEL: abd_minmax_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: cmpq %rdx, %rdi
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: sbbq %rcx, %rdx
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: cmovlq %rsi, %rdx
; X64-NEXT: movq %rax, %r8
; X64-NEXT: cmovlq %rdi, %r8
; X64-NEXT: cmpq %rdi, %rax
; X64-NEXT: movq %rcx, %r9
; X64-NEXT: sbbq %rsi, %r9
; X64-NEXT: cmovlq %rsi, %rcx
; X64-NEXT: cmovlq %rdi, %rax
; X64-NEXT: subq %r8, %rax
; X64-NEXT: sbbq %rdx, %rcx
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: subq %rdx, %rax
; X64-NEXT: movq %rsi, %r8
; X64-NEXT: sbbq %rcx, %r8
; X64-NEXT: subq %rdi, %rdx
; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: cmovgeq %rdx, %rax
; X64-NEXT: cmovgeq %rcx, %r8
; X64-NEXT: movq %r8, %rdx
; X64-NEXT: retq
%min = call i128 @llvm.smin.i128(i128 %a, i128 %b)
%max = call i128 @llvm.smax.i128(i128 %a, i128 %b)

View File

@ -311,8 +311,6 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
; X86-NEXT: xorl %esi, %ecx
; X86-NEXT: xorl %esi, %eax
; X86-NEXT: subl %esi, %eax
@ -351,8 +349,6 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
; X86-NEXT: xorl %esi, %ecx
; X86-NEXT: xorl %esi, %eax
; X86-NEXT: subl %esi, %eax
@ -387,42 +383,36 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: xorl %ebp, %edx
; X86-NEXT: xorl %ebp, %edi
; X86-NEXT: xorl %ebp, %ebx
; X86-NEXT: xorl %ebp, %ecx
; X86-NEXT: xorl %ebp, %esi
; X86-NEXT: subl %ebp, %esi
; X86-NEXT: xorl %ebp, %ebx
; X86-NEXT: xorl %ebp, %edx
; X86-NEXT: subl %ebp, %edx
; X86-NEXT: sbbl %ebp, %ebx
; X86-NEXT: sbbl %ebp, %edi
; X86-NEXT: sbbl %ebp, %edx
; X86-NEXT: negl %esi
; X86-NEXT: sbbl %ebp, %esi
; X86-NEXT: sbbl %ebp, %ecx
; X86-NEXT: negl %edx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl $0, %ebx
; X86-NEXT: sbbl %edi, %ebx
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: sbbl %ecx, %edi
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %ebp, 4(%eax)
; X86-NEXT: movl %ebx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@ -437,8 +427,6 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: movl $0, %ecx
; X64-NEXT: sbbq %rcx, %rcx
; X64-NEXT: movl $0, %ecx
; X64-NEXT: sbbq %rcx, %rcx
; X64-NEXT: xorq %rcx, %rsi
; X64-NEXT: xorq %rcx, %rax
; X64-NEXT: subq %rcx, %rax
@ -463,42 +451,36 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: xorl %ebp, %edx
; X86-NEXT: xorl %ebp, %edi
; X86-NEXT: xorl %ebp, %ebx
; X86-NEXT: xorl %ebp, %ecx
; X86-NEXT: xorl %ebp, %esi
; X86-NEXT: subl %ebp, %esi
; X86-NEXT: xorl %ebp, %ebx
; X86-NEXT: xorl %ebp, %edx
; X86-NEXT: subl %ebp, %edx
; X86-NEXT: sbbl %ebp, %ebx
; X86-NEXT: sbbl %ebp, %edi
; X86-NEXT: sbbl %ebp, %edx
; X86-NEXT: negl %esi
; X86-NEXT: sbbl %ebp, %esi
; X86-NEXT: sbbl %ebp, %ecx
; X86-NEXT: negl %edx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebx, %ebp
; X86-NEXT: movl $0, %ebx
; X86-NEXT: sbbl %edi, %ebx
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: sbbl %ecx, %edi
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %ebp, 4(%eax)
; X86-NEXT: movl %ebx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@ -513,8 +495,6 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: movl $0, %ecx
; X64-NEXT: sbbq %rcx, %rcx
; X64-NEXT: movl $0, %ecx
; X64-NEXT: sbbq %rcx, %rcx
; X64-NEXT: xorq %rcx, %rsi
; X64-NEXT: xorq %rcx, %rax
; X64-NEXT: subq %rcx, %rax

View File

@ -280,20 +280,16 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_ext_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i64:
@ -315,20 +311,16 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_ext_i64_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i64_undef:
@ -350,26 +342,19 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: sbbl %ebx, %ebx
; X86-NEXT: xorl %ebx, %ecx
; X86-NEXT: xorl %ebx, %edx
@ -386,7 +371,6 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_ext_i128:
@ -395,8 +379,6 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: subq %rdx, %rax
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: movl $0, %ecx
; X64-NEXT: sbbq %rcx, %rcx
; X64-NEXT: sbbq %rdi, %rdi
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: xorq %rdi, %rax
@ -415,26 +397,19 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: sbbl %ebx, %ebx
; X86-NEXT: xorl %ebx, %ecx
; X86-NEXT: xorl %ebx, %edx
@ -451,7 +426,6 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_ext_i128_undef:
@ -460,8 +434,6 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: subq %rdx, %rax
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: movl $0, %ecx
; X64-NEXT: sbbq %rcx, %rcx
; X64-NEXT: sbbq %rdi, %rdi
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: xorq %rdi, %rax
@ -568,32 +540,16 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_minmax_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: cmovbl %esi, %edi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: cmovbl %ecx, %ebx
; X86-NEXT: cmpl %ecx, %eax
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: sbbl %esi, %ebp
; X86-NEXT: cmovbl %esi, %edx
; X86-NEXT: cmovbl %ecx, %eax
; X86-NEXT: subl %ebx, %eax
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i64:
@ -613,81 +569,49 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: sbbl %ebx, %eax
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: movl %edi, %eax
; X86-NEXT: cmovbl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cmovbl %esi, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmovbl %eax, %ebp
; X86-NEXT: movl %edx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmovbl %esi, %eax
; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edi, %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: sbbl %ebp, %ebx
; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %ebx, 4(%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: addl $8, %esp
; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl %ebx, %ebx
; X86-NEXT: xorl %ebx, %ecx
; X86-NEXT: xorl %ebx, %edx
; X86-NEXT: xorl %ebx, %esi
; X86-NEXT: xorl %ebx, %edi
; X86-NEXT: subl %ebx, %edi
; X86-NEXT: sbbl %ebx, %esi
; X86-NEXT: sbbl %ebx, %edx
; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_minmax_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: cmpq %rdx, %rdi
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: subq %rdx, %rax
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: sbbq %rdi, %rdi
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: sbbq %rdi, %rsi
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: sbbq %rcx, %rdx
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: movq %rax, %r8
; X64-NEXT: cmovbq %rdi, %r8
; X64-NEXT: cmpq %rdi, %rax
; X64-NEXT: movq %rcx, %r9
; X64-NEXT: sbbq %rsi, %r9
; X64-NEXT: cmovbq %rsi, %rcx
; X64-NEXT: cmovbq %rdi, %rax
; X64-NEXT: subq %r8, %rax
; X64-NEXT: sbbq %rdx, %rcx
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: retq
%min = call i128 @llvm.umin.i128(i128 %a, i128 %b)
%max = call i128 @llvm.umax.i128(i128 %a, i128 %b)