mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 12:36:10 +00:00
[AArch64] Add @llvm.experimental.vector.match (#101974)
This patch introduces an experimental intrinsic for matching the elements of one vector against the elements of another. For AArch64 targets that support SVE2, the intrinsic lowers to a MATCH instruction for supported fixed and scalar vector types.
This commit is contained in:
parent
debfd7b0b4
commit
e52238b59f
@ -20091,6 +20091,44 @@ are undefined.
|
||||
}
|
||||
|
||||
|
||||
'``llvm.experimental.vector.match.*``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <<n> x i1> @llvm.experimental.vector.match(<<n> x <ty>> %op1, <<m> x <ty>> %op2, <<n> x i1> %mask)
|
||||
declare <vscale x <n> x i1> @llvm.experimental.vector.match(<vscale x <n> x <ty>> %op1, <<m> x <ty>> %op2, <vscale x <n> x i1> %mask)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
Find active elements of the first argument matching any elements of the second.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first argument is the search vector, the second argument the vector of
|
||||
elements we are searching for (i.e. for which we consider a match successful),
|
||||
and the third argument is a mask that controls which elements of the first
|
||||
argument are active. The first two arguments must be vectors of matching
|
||||
integer element types. The first and third arguments and the result type must
|
||||
have matching element counts (fixed or scalable). The second argument must be a
|
||||
fixed vector, but its length may be different from the remaining arguments.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.experimental.vector.match``' intrinsic compares each active element
|
||||
in the first argument against the elements of the second argument, placing
|
||||
``1`` in the corresponding element of the output vector if any equality
|
||||
comparison is successful, and ``0`` otherwise. Inactive elements in the mask
|
||||
are set to ``0`` in the output.
|
||||
|
||||
Matrix Intrinsics
|
||||
-----------------
|
||||
|
||||
|
@ -483,6 +483,13 @@ public:
|
||||
bool ZeroIsPoison,
|
||||
const ConstantRange *VScaleRange) const;
|
||||
|
||||
/// Return true if the @llvm.experimental.vector.match intrinsic should be
|
||||
/// expanded for vector type `VT' and search size `SearchSize' using generic
|
||||
/// code in SelectionDAGBuilder.
|
||||
virtual bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
|
||||
// vecreduce(op(x, y)) for the reduction opcode RedOpc.
|
||||
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {
|
||||
|
@ -1920,6 +1920,14 @@ def int_experimental_vector_histogram_add : DefaultAttrsIntrinsic<[],
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
|
||||
[ IntrArgMemOnly ]>;
|
||||
|
||||
// Experimental match
|
||||
def int_experimental_vector_match : DefaultAttrsIntrinsic<
|
||||
[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
|
||||
[ llvm_anyvector_ty,
|
||||
llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], // Mask
|
||||
[ IntrNoMem, IntrNoSync, IntrWillReturn ]>;
|
||||
|
||||
// Operators
|
||||
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
|
||||
// Integer arithmetic
|
||||
|
@ -8175,6 +8175,36 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::experimental_vector_match: {
|
||||
SDValue Op1 = getValue(I.getOperand(0));
|
||||
SDValue Op2 = getValue(I.getOperand(1));
|
||||
SDValue Mask = getValue(I.getOperand(2));
|
||||
EVT Op1VT = Op1.getValueType();
|
||||
EVT Op2VT = Op2.getValueType();
|
||||
EVT ResVT = Mask.getValueType();
|
||||
unsigned SearchSize = Op2VT.getVectorNumElements();
|
||||
|
||||
// If the target has native support for this vector match operation, lower
|
||||
// the intrinsic untouched; otherwise, expand it below.
|
||||
if (!TLI.shouldExpandVectorMatch(Op1VT, SearchSize)) {
|
||||
visitTargetIntrinsic(I, Intrinsic);
|
||||
return;
|
||||
}
|
||||
|
||||
SDValue Ret = DAG.getConstant(0, sdl, ResVT);
|
||||
|
||||
for (unsigned i = 0; i < SearchSize; ++i) {
|
||||
SDValue Op2Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl,
|
||||
Op2VT.getVectorElementType(), Op2,
|
||||
DAG.getVectorIdxConstant(i, sdl));
|
||||
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, sdl, Op1VT, Op2Elem);
|
||||
SDValue Cmp = DAG.getSetCC(sdl, ResVT, Op1, Splat, ISD::SETEQ);
|
||||
Ret = DAG.getNode(ISD::OR, sdl, ResVT, Ret, Cmp);
|
||||
}
|
||||
|
||||
setValue(&I, DAG.getNode(ISD::AND, sdl, ResVT, Ret, Mask));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::vector_reverse:
|
||||
visitVectorReverse(I);
|
||||
return;
|
||||
|
@ -6150,6 +6150,31 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
|
||||
&Call);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::experimental_vector_match: {
|
||||
Value *Op1 = Call.getArgOperand(0);
|
||||
Value *Op2 = Call.getArgOperand(1);
|
||||
Value *Mask = Call.getArgOperand(2);
|
||||
|
||||
VectorType *Op1Ty = dyn_cast<VectorType>(Op1->getType());
|
||||
VectorType *Op2Ty = dyn_cast<VectorType>(Op2->getType());
|
||||
VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
|
||||
|
||||
Check(Op1Ty && Op2Ty && MaskTy, "Operands must be vectors.", &Call);
|
||||
Check(isa<FixedVectorType>(Op2Ty),
|
||||
"Second operand must be a fixed length vector.", &Call);
|
||||
Check(Op1Ty->getElementType()->isIntegerTy(),
|
||||
"First operand must be a vector of integers.", &Call);
|
||||
Check(Op1Ty->getElementType() == Op2Ty->getElementType(),
|
||||
"First two operands must have the same element type.", &Call);
|
||||
Check(Op1Ty->getElementCount() == MaskTy->getElementCount(),
|
||||
"First operand and mask must have the same number of elements.",
|
||||
&Call);
|
||||
Check(MaskTy->getElementType()->isIntegerTy(1),
|
||||
"Mask must be a vector of i1's.", &Call);
|
||||
Check(Call.getType() == MaskTy, "Return type must match the mask type.",
|
||||
&Call);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::vector_insert: {
|
||||
Value *Vec = Call.getArgOperand(0);
|
||||
Value *SubVec = Call.getArgOperand(1);
|
||||
|
@ -2059,6 +2059,19 @@ bool AArch64TargetLowering::shouldExpandCttzElements(EVT VT) const {
|
||||
VT != MVT::v4i1 && VT != MVT::v2i1;
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::shouldExpandVectorMatch(EVT VT,
|
||||
unsigned SearchSize) const {
|
||||
// MATCH is SVE2 and only available in non-streaming mode.
|
||||
if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
|
||||
return true;
|
||||
// Furthermore, we can only use it for 8-bit or 16-bit elements.
|
||||
if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
|
||||
return SearchSize != 8;
|
||||
if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
|
||||
return SearchSize != 8 && SearchSize != 16;
|
||||
return true;
|
||||
}
|
||||
|
||||
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
|
||||
|
||||
@ -5780,6 +5793,72 @@ SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
|
||||
DAG.getTargetConstant(ImmAddend, DL, MVT::i32)});
|
||||
}
|
||||
|
||||
SDValue LowerVectorMatch(SDValue Op, SelectionDAG &DAG) {
|
||||
SDLoc dl(Op);
|
||||
SDValue ID =
|
||||
DAG.getTargetConstant(Intrinsic::aarch64_sve_match, dl, MVT::i64);
|
||||
|
||||
auto Op1 = Op.getOperand(1);
|
||||
auto Op2 = Op.getOperand(2);
|
||||
auto Mask = Op.getOperand(3);
|
||||
|
||||
EVT Op1VT = Op1.getValueType();
|
||||
EVT Op2VT = Op2.getValueType();
|
||||
EVT ResVT = Op.getValueType();
|
||||
|
||||
assert((Op1VT.getVectorElementType() == MVT::i8 ||
|
||||
Op1VT.getVectorElementType() == MVT::i16) &&
|
||||
"Expected 8-bit or 16-bit characters.");
|
||||
|
||||
// Scalable vector type used to wrap operands.
|
||||
// A single container is enough for both operands because ultimately the
|
||||
// operands will have to be wrapped to the same type (nxv16i8 or nxv8i16).
|
||||
EVT OpContainerVT = Op1VT.isScalableVector()
|
||||
? Op1VT
|
||||
: getContainerForFixedLengthVector(DAG, Op1VT);
|
||||
|
||||
if (Op2VT.is128BitVector()) {
|
||||
// If Op2 is a full 128-bit vector, wrap it trivially in a scalable vector.
|
||||
Op2 = convertToScalableVector(DAG, OpContainerVT, Op2);
|
||||
// Further, if the result is scalable, broadcast Op2 to a full SVE register.
|
||||
if (ResVT.isScalableVector())
|
||||
Op2 = DAG.getNode(AArch64ISD::DUPLANE128, dl, OpContainerVT, Op2,
|
||||
DAG.getTargetConstant(0, dl, MVT::i64));
|
||||
} else {
|
||||
// If Op2 is not a full 128-bit vector, we always need to broadcast it.
|
||||
unsigned Op2BitWidth = Op2VT.getFixedSizeInBits();
|
||||
MVT Op2IntVT = MVT::getIntegerVT(Op2BitWidth);
|
||||
EVT Op2PromotedVT = getPackedSVEVectorVT(Op2IntVT);
|
||||
Op2 = DAG.getBitcast(MVT::getVectorVT(Op2IntVT, 1), Op2);
|
||||
Op2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op2IntVT, Op2,
|
||||
DAG.getConstant(0, dl, MVT::i64));
|
||||
Op2 = DAG.getSplatVector(Op2PromotedVT, dl, Op2);
|
||||
Op2 = DAG.getBitcast(OpContainerVT, Op2);
|
||||
}
|
||||
|
||||
// If the result is scalable, we just need to carry out the MATCH.
|
||||
if (ResVT.isScalableVector())
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResVT, ID, Mask, Op1, Op2);
|
||||
|
||||
// If the result is fixed, we can still use MATCH but we need to wrap the
|
||||
// first operand and the mask in scalable vectors before doing so.
|
||||
|
||||
// Wrap the operands.
|
||||
Op1 = convertToScalableVector(DAG, OpContainerVT, Op1);
|
||||
Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, Op1VT, Mask);
|
||||
Mask = convertFixedMaskToScalableVector(Mask, DAG);
|
||||
|
||||
// Carry out the match.
|
||||
SDValue Match = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Mask.getValueType(),
|
||||
ID, Mask, Op1, Op2);
|
||||
|
||||
// Extract and promote the match result (nxv16i1/nxv8i1) to ResVT
|
||||
// (v16i8/v8i8).
|
||||
Match = DAG.getNode(ISD::SIGN_EXTEND, dl, OpContainerVT, Match);
|
||||
Match = convertFromScalableVector(DAG, Op1VT, Match);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Match);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
unsigned IntNo = Op.getConstantOperandVal(1);
|
||||
@ -6383,6 +6462,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
DAG.getNode(AArch64ISD::CTTZ_ELTS, dl, MVT::i64, CttzOp);
|
||||
return DAG.getZExtOrTrunc(NewCttzElts, dl, Op.getValueType());
|
||||
}
|
||||
case Intrinsic::experimental_vector_match: {
|
||||
return LowerVectorMatch(Op, DAG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -27153,6 +27235,7 @@ void AArch64TargetLowering::ReplaceNodeResults(
|
||||
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::experimental_vector_match:
|
||||
case Intrinsic::get_active_lane_mask: {
|
||||
if (!VT.isFixedLengthVector() || VT.getVectorElementType() != MVT::i1)
|
||||
return;
|
||||
|
@ -985,6 +985,8 @@ public:
|
||||
|
||||
bool shouldExpandCttzElements(EVT VT) const override;
|
||||
|
||||
bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override;
|
||||
|
||||
/// If a change in streaming mode is required on entry to/return from a
|
||||
/// function call it emits and returns the corresponding SMSTART or SMSTOP
|
||||
/// node. \p Condition should be one of the enum values from
|
||||
|
542
llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
Normal file
542
llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
Normal file
@ -0,0 +1,542 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
|
||||
|
||||
define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv16i8_v1i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
|
||||
; CHECK-NEXT: mov z1.b, b1
|
||||
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask)
|
||||
ret <vscale x 16 x i1> %r
|
||||
}
|
||||
|
||||
define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv16i8_v2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: mov w8, v1.s[1]
|
||||
; CHECK-NEXT: fmov w9, s1
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: mov z2.b, w9
|
||||
; CHECK-NEXT: mov z1.b, w8
|
||||
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
|
||||
; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
|
||||
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask)
|
||||
ret <vscale x 16 x i1> %r
|
||||
}
|
||||
|
||||
define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv16i8_v4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: umov w8, v1.h[1]
|
||||
; CHECK-NEXT: umov w9, v1.h[0]
|
||||
; CHECK-NEXT: umov w10, v1.h[2]
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: mov z2.b, w8
|
||||
; CHECK-NEXT: mov z3.b, w9
|
||||
; CHECK-NEXT: umov w8, v1.h[3]
|
||||
; CHECK-NEXT: mov z1.b, w10
|
||||
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z2.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z2.b, w8
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
|
||||
; CHECK-NEXT: mov p2.b, p3/m, p3.b
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: mov p1.b, p2/m, p2.b
|
||||
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask)
|
||||
ret <vscale x 16 x i1> %r
|
||||
}
|
||||
|
||||
define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv16i8_v8i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
|
||||
; CHECK-NEXT: mov z1.d, d1
|
||||
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask)
|
||||
ret <vscale x 16 x i1> %r
|
||||
}
|
||||
|
||||
define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv16i8_v16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
||||
; CHECK-NEXT: mov z1.q, q1
|
||||
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask)
|
||||
ret <vscale x 16 x i1> %r
|
||||
}
|
||||
|
||||
define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v16i8_v1i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: dup v1.16b, v1.b[0]
|
||||
; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask)
|
||||
ret <16 x i1> %r
|
||||
}
|
||||
|
||||
define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v16i8_v2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: dup v3.16b, v1.b[4]
|
||||
; CHECK-NEXT: dup v1.16b, v1.b[0]
|
||||
; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
|
||||
; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask)
|
||||
ret <16 x i1> %r
|
||||
}
|
||||
|
||||
define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v16i8_v4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-NEXT: dup v3.16b, v1.b[2]
|
||||
; CHECK-NEXT: dup v4.16b, v1.b[0]
|
||||
; CHECK-NEXT: dup v5.16b, v1.b[4]
|
||||
; CHECK-NEXT: dup v1.16b, v1.b[6]
|
||||
; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
|
||||
; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b
|
||||
; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
|
||||
; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b
|
||||
; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
|
||||
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask)
|
||||
ret <16 x i1> %r
|
||||
}
|
||||
|
||||
define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v16i8_v8i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: shl v2.16b, v2.16b, #7
|
||||
; CHECK-NEXT: ptrue p0.b, vl16
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||
; CHECK-NEXT: mov z1.d, d1
|
||||
; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
|
||||
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
|
||||
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask)
|
||||
ret <16 x i1> %r
|
||||
}
|
||||
|
||||
define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v16i8_v16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: shl v2.16b, v2.16b, #7
|
||||
; CHECK-NEXT: ptrue p0.b, vl16
|
||||
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||
; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
|
||||
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
|
||||
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
|
||||
ret <16 x i1> %r
|
||||
}
|
||||
|
||||
define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v8i8_v8i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: shl v2.8b, v2.8b, #7
|
||||
; CHECK-NEXT: ptrue p0.b, vl8
|
||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||
; CHECK-NEXT: mov z1.d, d1
|
||||
; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
|
||||
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
|
||||
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask)
|
||||
ret <8 x i1> %r
|
||||
}
|
||||
|
||||
define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv8i16_v8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
||||
; CHECK-NEXT: mov z1.q, q1
|
||||
; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 8 x i1> @llvm.experimental.vector.match(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask)
|
||||
ret <vscale x 8 x i1> %r
|
||||
}
|
||||
|
||||
define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
|
||||
; CHECK-NEXT: ptrue p0.h, vl8
|
||||
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||
; CHECK-NEXT: shl v2.8h, v2.8h, #15
|
||||
; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
|
||||
; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
|
||||
; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: xtn v0.8b, v0.8h
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask)
|
||||
ret <8 x i1> %r
|
||||
}
|
||||
|
||||
; Cases where op2 has more elements than op1.
|
||||
|
||||
define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v8i8_v16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: shl v2.8b, v2.8b, #7
|
||||
; CHECK-NEXT: ptrue p0.b, vl8
|
||||
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||
; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
|
||||
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
|
||||
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
|
||||
ret <8 x i1> %r
|
||||
}
|
||||
|
||||
define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv16i8_v32i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
||||
; CHECK-NEXT: mov z3.b, z1.b[1]
|
||||
; CHECK-NEXT: mov z4.b, b1
|
||||
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: mov z5.b, z1.b[2]
|
||||
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z4.b
|
||||
; CHECK-NEXT: mov z3.b, z1.b[3]
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z5.b
|
||||
; CHECK-NEXT: mov z4.b, z1.b[4]
|
||||
; CHECK-NEXT: mov p2.b, p3/m, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z1.b[5]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
|
||||
; CHECK-NEXT: mov z4.b, z1.b[6]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z1.b[7]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
|
||||
; CHECK-NEXT: mov z4.b, z1.b[8]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z1.b[9]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
|
||||
; CHECK-NEXT: mov z4.b, z1.b[10]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z1.b[11]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
|
||||
; CHECK-NEXT: mov z4.b, z1.b[12]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z1.b[13]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
|
||||
; CHECK-NEXT: mov z4.b, z1.b[14]
|
||||
; CHECK-NEXT: mov z1.b, z1.b[15]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, b2
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z1.b, z2.b[1]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z2.b[2]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z1.b, z2.b[3]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z2.b[4]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z1.b, z2.b[5]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z2.b[6]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z1.b, z2.b[7]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z2.b[8]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z1.b, z2.b[9]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z2.b[10]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z1.b, z2.b[11]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z2.b[12]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z1.b, z2.b[13]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: mov z3.b, z2.b[14]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: mov z1.b, z2.b[15]
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
|
||||
; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z1.b
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: mov p1.b, p2/m, p2.b
|
||||
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask)
|
||||
ret <vscale x 16 x i1> %r
|
||||
}
|
||||
|
||||
define <16 x i1> @match_v16i8_v32i8(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v16i8_v32i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup v4.16b, v1.b[1]
|
||||
; CHECK-NEXT: dup v5.16b, v1.b[0]
|
||||
; CHECK-NEXT: dup v6.16b, v1.b[2]
|
||||
; CHECK-NEXT: dup v7.16b, v1.b[3]
|
||||
; CHECK-NEXT: dup v16.16b, v1.b[4]
|
||||
; CHECK-NEXT: dup v17.16b, v1.b[5]
|
||||
; CHECK-NEXT: dup v18.16b, v1.b[6]
|
||||
; CHECK-NEXT: dup v19.16b, v1.b[7]
|
||||
; CHECK-NEXT: dup v20.16b, v1.b[8]
|
||||
; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b
|
||||
; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
|
||||
; CHECK-NEXT: cmeq v6.16b, v0.16b, v6.16b
|
||||
; CHECK-NEXT: cmeq v7.16b, v0.16b, v7.16b
|
||||
; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b
|
||||
; CHECK-NEXT: cmeq v17.16b, v0.16b, v17.16b
|
||||
; CHECK-NEXT: dup v21.16b, v2.b[7]
|
||||
; CHECK-NEXT: dup v22.16b, v1.b[10]
|
||||
; CHECK-NEXT: orr v4.16b, v5.16b, v4.16b
|
||||
; CHECK-NEXT: orr v5.16b, v6.16b, v7.16b
|
||||
; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
|
||||
; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
|
||||
; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
|
||||
; CHECK-NEXT: cmeq v17.16b, v0.16b, v20.16b
|
||||
; CHECK-NEXT: dup v18.16b, v1.b[9]
|
||||
; CHECK-NEXT: dup v19.16b, v1.b[11]
|
||||
; CHECK-NEXT: dup v20.16b, v1.b[12]
|
||||
; CHECK-NEXT: cmeq v22.16b, v0.16b, v22.16b
|
||||
; CHECK-NEXT: orr v4.16b, v4.16b, v5.16b
|
||||
; CHECK-NEXT: orr v5.16b, v6.16b, v7.16b
|
||||
; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
|
||||
; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
|
||||
; CHECK-NEXT: dup v18.16b, v1.b[13]
|
||||
; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
|
||||
; CHECK-NEXT: cmeq v17.16b, v0.16b, v20.16b
|
||||
; CHECK-NEXT: dup v19.16b, v2.b[0]
|
||||
; CHECK-NEXT: dup v20.16b, v2.b[1]
|
||||
; CHECK-NEXT: orr v4.16b, v4.16b, v5.16b
|
||||
; CHECK-NEXT: dup v5.16b, v2.b[6]
|
||||
; CHECK-NEXT: orr v6.16b, v6.16b, v7.16b
|
||||
; CHECK-NEXT: orr v7.16b, v16.16b, v17.16b
|
||||
; CHECK-NEXT: cmeq v16.16b, v0.16b, v18.16b
|
||||
; CHECK-NEXT: cmeq v17.16b, v0.16b, v19.16b
|
||||
; CHECK-NEXT: cmeq v18.16b, v0.16b, v20.16b
|
||||
; CHECK-NEXT: dup v19.16b, v2.b[2]
|
||||
; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
|
||||
; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
|
||||
; CHECK-NEXT: dup v21.16b, v2.b[8]
|
||||
; CHECK-NEXT: orr v6.16b, v6.16b, v22.16b
|
||||
; CHECK-NEXT: orr v7.16b, v7.16b, v16.16b
|
||||
; CHECK-NEXT: dup v16.16b, v1.b[14]
|
||||
; CHECK-NEXT: dup v1.16b, v1.b[15]
|
||||
; CHECK-NEXT: orr v17.16b, v17.16b, v18.16b
|
||||
; CHECK-NEXT: cmeq v18.16b, v0.16b, v19.16b
|
||||
; CHECK-NEXT: dup v19.16b, v2.b[3]
|
||||
; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
|
||||
; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
|
||||
; CHECK-NEXT: dup v21.16b, v2.b[9]
|
||||
; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b
|
||||
; CHECK-NEXT: cmeq v1.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: orr v4.16b, v4.16b, v6.16b
|
||||
; CHECK-NEXT: orr v17.16b, v17.16b, v18.16b
|
||||
; CHECK-NEXT: cmeq v18.16b, v0.16b, v19.16b
|
||||
; CHECK-NEXT: dup v19.16b, v2.b[4]
|
||||
; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
|
||||
; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
|
||||
; CHECK-NEXT: dup v21.16b, v2.b[10]
|
||||
; CHECK-NEXT: orr v7.16b, v7.16b, v16.16b
|
||||
; CHECK-NEXT: orr v16.16b, v17.16b, v18.16b
|
||||
; CHECK-NEXT: cmeq v17.16b, v0.16b, v19.16b
|
||||
; CHECK-NEXT: dup v18.16b, v2.b[5]
|
||||
; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
|
||||
; CHECK-NEXT: cmeq v19.16b, v0.16b, v21.16b
|
||||
; CHECK-NEXT: dup v20.16b, v2.b[11]
|
||||
; CHECK-NEXT: orr v1.16b, v7.16b, v1.16b
|
||||
; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
|
||||
; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
|
||||
; CHECK-NEXT: dup v17.16b, v2.b[12]
|
||||
; CHECK-NEXT: orr v5.16b, v5.16b, v19.16b
|
||||
; CHECK-NEXT: cmeq v16.16b, v0.16b, v20.16b
|
||||
; CHECK-NEXT: dup v18.16b, v2.b[13]
|
||||
; CHECK-NEXT: dup v19.16b, v2.b[14]
|
||||
; CHECK-NEXT: orr v1.16b, v4.16b, v1.16b
|
||||
; CHECK-NEXT: dup v2.16b, v2.b[15]
|
||||
; CHECK-NEXT: orr v4.16b, v6.16b, v7.16b
|
||||
; CHECK-NEXT: cmeq v6.16b, v0.16b, v17.16b
|
||||
; CHECK-NEXT: orr v5.16b, v5.16b, v16.16b
|
||||
; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
|
||||
; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
|
||||
; CHECK-NEXT: cmeq v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: orr v1.16b, v1.16b, v4.16b
|
||||
; CHECK-NEXT: orr v4.16b, v5.16b, v6.16b
|
||||
; CHECK-NEXT: orr v5.16b, v7.16b, v16.16b
|
||||
; CHECK-NEXT: orr v1.16b, v1.16b, v4.16b
|
||||
; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
|
||||
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask)
|
||||
ret <16 x i1> %r
|
||||
}
|
||||
|
||||
; Data types not supported by MATCH.
|
||||
; Note: The cases for SVE could be made tighter.
|
||||
|
||||
define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv4xi32_v4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
||||
; CHECK-NEXT: mov z2.s, z1.s[1]
|
||||
; CHECK-NEXT: mov z3.s, s1
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: mov z4.s, z1.s[2]
|
||||
; CHECK-NEXT: mov z1.s, z1.s[3]
|
||||
; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z2.s
|
||||
; CHECK-NEXT: cmpeq p3.s, p1/z, z0.s, z3.s
|
||||
; CHECK-NEXT: cmpeq p4.s, p1/z, z0.s, z4.s
|
||||
; CHECK-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s
|
||||
; CHECK-NEXT: mov p2.b, p3/m, p3.b
|
||||
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
|
||||
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: mov p1.b, p2/m, p2.b
|
||||
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 4 x i1> @llvm.experimental.vector.match(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask)
|
||||
ret <vscale x 4 x i1> %r
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @match_nxv2xi64_v2i64(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_nxv2xi64_v2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
||||
; CHECK-NEXT: mov z2.d, z1.d[1]
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: mov z1.d, d1
|
||||
; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z2.d
|
||||
; CHECK-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d
|
||||
; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
|
||||
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <vscale x 2 x i1> @llvm.experimental.vector.match(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask)
|
||||
ret <vscale x 2 x i1> %r
|
||||
}
|
||||
|
||||
define <4 x i1> @match_v4xi32_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v4xi32_v4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup v3.4s, v1.s[1]
|
||||
; CHECK-NEXT: dup v4.4s, v1.s[0]
|
||||
; CHECK-NEXT: dup v5.4s, v1.s[2]
|
||||
; CHECK-NEXT: dup v1.4s, v1.s[3]
|
||||
; CHECK-NEXT: cmeq v3.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: cmeq v4.4s, v0.4s, v4.4s
|
||||
; CHECK-NEXT: cmeq v5.4s, v0.4s, v5.4s
|
||||
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b
|
||||
; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
|
||||
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
|
||||
; CHECK-NEXT: xtn v0.4h, v0.4s
|
||||
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <4 x i1> @llvm.experimental.vector.match(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask)
|
||||
ret <4 x i1> %r
|
||||
}
|
||||
|
||||
define <2 x i1> @match_v2xi64_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: match_v2xi64_v2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup v3.2d, v1.d[1]
|
||||
; CHECK-NEXT: dup v1.2d, v1.d[0]
|
||||
; CHECK-NEXT: cmeq v3.2d, v0.2d, v3.2d
|
||||
; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
|
||||
; CHECK-NEXT: xtn v0.2s, v0.2d
|
||||
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
|
||||
; CHECK-NEXT: ret
|
||||
%r = tail call <2 x i1> @llvm.experimental.vector.match(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask)
|
||||
ret <2 x i1> %r
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve2" }
|
Loading…
x
Reference in New Issue
Block a user