[VP][RISCV] Add vp.ctlz/cttz and RISC-V support.

The patch also adds expandVPCTLZ and expandVPCTTZ to expand vp.ctlz/cttz nodes
and the cost model of vp.ctlz/cttz.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D140370
This commit is contained in:
Yeting Kuo 2022-12-20 13:24:01 +08:00
parent 63d46869ea
commit 1e9e1b9cf8
16 changed files with 25941 additions and 1 deletions

View File

@ -15394,6 +15394,8 @@ Semantics:
The '``llvm.ctpop``' intrinsic counts the 1's in a variable, or within
each element of a vector.
.. _int_ctlz:
'``llvm.ctlz.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -15438,6 +15440,8 @@ zeros in a variable, or within each element of the vector. If
if ``is_zero_poison == 0`` and ``poison`` otherwise. For example,
``llvm.ctlz(i32 2) = 30``.
.. _int_cttz:
'``llvm.cttz.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -22278,6 +22282,100 @@ Examples:
%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
.. _int_vp_ctlz:
'``llvm.vp.ctlz.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <16 x i32> @llvm.vp.ctlz.v16i32 (<16 x i32> <op>, <16 x i1> <mask>, i32 <vector_length>, i1 <is_zero_poison>)
declare <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32 (<vscale x 4 x i32> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>, i1 <is_zero_poison>)
declare <256 x i64> @llvm.vp.ctlz.v256i64 (<256 x i64> <op>, <256 x i1> <mask>, i32 <vector_length>, i1 <is_zero_poison>)
Overview:
"""""""""
Predicated ctlz of a vector of integers.
Arguments:
""""""""""
The first operand and the result have the same vector of integer type. The
second operand is the vector mask and has the same number of elements as the
result vector type. The third operand is the explicit vector length of the
operation.
Semantics:
""""""""""
The '``llvm.vp.ctlz``' intrinsic performs ctlz (:ref:`ctlz <int_ctlz>`) of the first operand on each
enabled lane. The result on disabled lanes is a :ref:`poison value <poisonvalues>`.
Examples:
"""""""""
.. code-block:: llvm
%r = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl, i1 false)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
%t = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
.. _int_vp_cttz:
'``llvm.vp.cttz.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <16 x i32> @llvm.vp.cttz.v16i32 (<16 x i32> <op>, <16 x i1> <mask>, i32 <vector_length>, i1 <is_zero_poison>)
declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32 (<vscale x 4 x i32> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>, i1 <is_zero_poison>)
declare <256 x i64> @llvm.vp.cttz.v256i64 (<256 x i64> <op>, <256 x i1> <mask>, i32 <vector_length>, i1 <is_zero_poison>)
Overview:
"""""""""
Predicated cttz of a vector of integers.
Arguments:
""""""""""
The first operand and the result have the same vector of integer type. The
second operand is the vector mask and has the same number of elements as the
result vector type. The third operand is the explicit vector length of the
operation.
Semantics:
""""""""""
The '``llvm.vp.cttz``' intrinsic performs cttz (:ref:`cttz <int_cttz>`) of the first operand on each
enabled lane. The result on disabled lanes is a :ref:`poison value <poisonvalues>`.
Examples:
"""""""""
.. code-block:: llvm
%r = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl, i1 false)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
%t = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
.. _int_vp_fshl:
'``llvm.vp.fshl.*``' Intrinsics

View File

@ -4911,6 +4911,11 @@ public:
/// \returns The expansion result or SDValue() if it fails.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const;
/// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
/// \param N Node to expand
/// \returns The expansion result or SDValue() if it fails.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const;
/// Expand CTTZ via Table Lookup.
/// \param N Node to expand
/// \returns The expansion result or SDValue() if it fails.
@ -4923,6 +4928,11 @@ public:
/// \returns The expansion result or SDValue() if it fails.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const;
/// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
/// \param N Node to expand
/// \returns The expansion result or SDValue() if it fails.
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const;
/// Expand ABS nodes. Expands vector/scalar ABS nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))

View File

@ -1817,6 +1817,19 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
llvm_i32_ty]>;
}
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg<ArgIndex<3>>] in {
def int_vp_ctlz : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty,
llvm_i1_ty]>;
def int_vp_cttz : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty,
llvm_i1_ty]>;
}
def int_get_active_lane_mask:
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyint_ty, LLVMMatchType<1>],

View File

@ -228,6 +228,22 @@ END_REGISTER_VP(vp_bitreverse, VP_BITREVERSE)
BEGIN_REGISTER_VP(vp_ctpop, 1, 2, VP_CTPOP, -1)
END_REGISTER_VP(vp_ctpop, VP_CTPOP)
// llvm.vp.ctlz(x,mask,vlen, is_zero_poison)
BEGIN_REGISTER_VP_INTRINSIC(vp_ctlz, 1, 2)
BEGIN_REGISTER_VP_SDNODE(VP_CTLZ, -1, vp_ctlz, 1, 2)
END_REGISTER_VP_SDNODE(VP_CTLZ)
BEGIN_REGISTER_VP_SDNODE(VP_CTLZ_ZERO_UNDEF, -1, vp_ctlz_zero_undef, 1, 2)
END_REGISTER_VP_SDNODE(VP_CTLZ_ZERO_UNDEF)
END_REGISTER_VP_INTRINSIC(vp_ctlz)
// llvm.vp.cttz(x,mask,vlen, is_zero_poison)
BEGIN_REGISTER_VP_INTRINSIC(vp_cttz, 1, 2)
BEGIN_REGISTER_VP_SDNODE(VP_CTTZ, -1, vp_cttz, 1, 2)
END_REGISTER_VP_SDNODE(VP_CTTZ)
BEGIN_REGISTER_VP_SDNODE(VP_CTTZ_ZERO_UNDEF, -1, vp_cttz_zero_undef, 1, 2)
END_REGISTER_VP_SDNODE(VP_CTTZ_ZERO_UNDEF)
END_REGISTER_VP_INTRINSIC(vp_cttz)
// llvm.vp.fshl(x,y,z,mask,vlen)
BEGIN_REGISTER_VP(vp_fshl, 3, 4, VP_FSHL, -1)
END_REGISTER_VP(vp_fshl, VP_FSHL)

View File

@ -813,6 +813,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
case ISD::VP_CTLZ:
case ISD::VP_CTLZ_ZERO_UNDEF:
if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
Results.push_back(Expanded);
return;
}
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
@ -820,6 +827,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
case ISD::VP_CTTZ:
case ISD::VP_CTTZ_ZERO_UNDEF:
if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
Results.push_back(Expanded);
return;
}
break;
case ISD::FSHL:
case ISD::VP_FSHL:
case ISD::FSHR:

View File

@ -1016,9 +1016,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BSWAP:
case ISD::VP_BSWAP:
case ISD::CTLZ:
case ISD::VP_CTLZ:
case ISD::CTTZ:
case ISD::VP_CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::VP_CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::VP_CTPOP:
case ISD::FABS: case ISD::VP_FABS:
@ -4097,11 +4101,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BSWAP:
case ISD::VP_BSWAP:
case ISD::CTLZ:
case ISD::VP_CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::VP_CTPOP:
case ISD::CTTZ:
case ISD::VP_CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::VP_CTTZ_ZERO_UNDEF:
case ISD::FNEG: case ISD::VP_FNEG:
case ISD::VP_FABS:
case ISD::VP_SQRT:

View File

@ -7437,6 +7437,16 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
std::optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
case Intrinsic::vp_ctlz: {
bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ;
break;
}
case Intrinsic::vp_cttz: {
bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
break;
}
#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
case Intrinsic::VPID: \
ResOPC = ISD::VPSD; \
@ -7771,6 +7781,16 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
setValue(&VPIntrin, N);
break;
}
case ISD::VP_CTLZ:
case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::VP_CTTZ:
case ISD::VP_CTTZ_ZERO_UNDEF: {
// Pop is_zero_poison operand.
OpValues.pop_back();
SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
setValue(&VPIntrin, Result);
break;
}
}
}

View File

@ -8382,6 +8382,33 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Op = Node->getOperand(0);
SDValue Mask = Node->getOperand(1);
SDValue VL = Node->getOperand(2);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
// do this:
// x = x | (x >> 1);
// x = x | (x >> 2);
// ...
// x = x | (x >>16);
// x = x | (x >>32); // for 64-bit input
// return popcount(~x);
for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
VL);
}
Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
VL);
return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
}
SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
const SDLoc &DL, EVT VT, SDValue Op,
unsigned BitWidth) const {
@ -8482,6 +8509,22 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
}
SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
SDValue Op = Node->getOperand(0);
SDValue Mask = Node->getOperand(1);
SDValue VL = Node->getOperand(2);
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
DAG.getConstant(-1, dl, VT), Mask, VL);
SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
DAG.getConstant(1, dl, VT), Mask, VL);
SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
}
SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
bool IsNegative) const {
SDLoc dl(N);

View File

@ -611,7 +611,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction({ISD::VP_BSWAP, ISD::VP_BITREVERSE}, VT, Expand);
setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
setOperationAction(ISD::VP_CTPOP, VT, Expand);
setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
VT, Expand);
// Custom-lower extensions and truncations from/to mask types.
setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},

View File

@ -746,6 +746,82 @@ static const CostTblEntry VectorIntrinsicCostTable[]{
{Intrinsic::vp_ctpop, MVT::nxv2i64, 21},
{Intrinsic::vp_ctpop, MVT::nxv4i64, 21},
{Intrinsic::vp_ctpop, MVT::nxv8i64, 21},
{Intrinsic::vp_ctlz, MVT::v2i8, 19},
{Intrinsic::vp_ctlz, MVT::v4i8, 19},
{Intrinsic::vp_ctlz, MVT::v8i8, 19},
{Intrinsic::vp_ctlz, MVT::v16i8, 19},
{Intrinsic::vp_ctlz, MVT::nxv1i8, 19},
{Intrinsic::vp_ctlz, MVT::nxv2i8, 19},
{Intrinsic::vp_ctlz, MVT::nxv4i8, 19},
{Intrinsic::vp_ctlz, MVT::nxv8i8, 19},
{Intrinsic::vp_ctlz, MVT::nxv16i8, 19},
{Intrinsic::vp_ctlz, MVT::nxv32i8, 19},
{Intrinsic::vp_ctlz, MVT::nxv64i8, 19},
{Intrinsic::vp_ctlz, MVT::v2i16, 28},
{Intrinsic::vp_ctlz, MVT::v4i16, 28},
{Intrinsic::vp_ctlz, MVT::v8i16, 28},
{Intrinsic::vp_ctlz, MVT::v16i16, 28},
{Intrinsic::vp_ctlz, MVT::nxv1i16, 28},
{Intrinsic::vp_ctlz, MVT::nxv2i16, 28},
{Intrinsic::vp_ctlz, MVT::nxv4i16, 28},
{Intrinsic::vp_ctlz, MVT::nxv8i16, 28},
{Intrinsic::vp_ctlz, MVT::nxv16i16, 28},
{Intrinsic::vp_ctlz, MVT::nxv32i16, 28},
{Intrinsic::vp_ctlz, MVT::v2i32, 31},
{Intrinsic::vp_ctlz, MVT::v4i32, 31},
{Intrinsic::vp_ctlz, MVT::v8i32, 31},
{Intrinsic::vp_ctlz, MVT::v16i32, 31},
{Intrinsic::vp_ctlz, MVT::nxv1i32, 31},
{Intrinsic::vp_ctlz, MVT::nxv2i32, 31},
{Intrinsic::vp_ctlz, MVT::nxv4i32, 31},
{Intrinsic::vp_ctlz, MVT::nxv8i32, 31},
{Intrinsic::vp_ctlz, MVT::nxv16i32, 31},
{Intrinsic::vp_ctlz, MVT::v2i64, 35},
{Intrinsic::vp_ctlz, MVT::v4i64, 35},
{Intrinsic::vp_ctlz, MVT::v8i64, 35},
{Intrinsic::vp_ctlz, MVT::v16i64, 35},
{Intrinsic::vp_ctlz, MVT::nxv1i64, 35},
{Intrinsic::vp_ctlz, MVT::nxv2i64, 35},
{Intrinsic::vp_ctlz, MVT::nxv4i64, 35},
{Intrinsic::vp_ctlz, MVT::nxv8i64, 35},
{Intrinsic::vp_cttz, MVT::v2i8, 16},
{Intrinsic::vp_cttz, MVT::v4i8, 16},
{Intrinsic::vp_cttz, MVT::v8i8, 16},
{Intrinsic::vp_cttz, MVT::v16i8, 16},
{Intrinsic::vp_cttz, MVT::nxv1i8, 16},
{Intrinsic::vp_cttz, MVT::nxv2i8, 16},
{Intrinsic::vp_cttz, MVT::nxv4i8, 16},
{Intrinsic::vp_cttz, MVT::nxv8i8, 16},
{Intrinsic::vp_cttz, MVT::nxv16i8, 16},
{Intrinsic::vp_cttz, MVT::nxv32i8, 16},
{Intrinsic::vp_cttz, MVT::nxv64i8, 16},
{Intrinsic::vp_cttz, MVT::v2i16, 23},
{Intrinsic::vp_cttz, MVT::v4i16, 23},
{Intrinsic::vp_cttz, MVT::v8i16, 23},
{Intrinsic::vp_cttz, MVT::v16i16, 23},
{Intrinsic::vp_cttz, MVT::nxv1i16, 23},
{Intrinsic::vp_cttz, MVT::nxv2i16, 23},
{Intrinsic::vp_cttz, MVT::nxv4i16, 23},
{Intrinsic::vp_cttz, MVT::nxv8i16, 23},
{Intrinsic::vp_cttz, MVT::nxv16i16, 23},
{Intrinsic::vp_cttz, MVT::nxv32i16, 23},
{Intrinsic::vp_cttz, MVT::v2i32, 24},
{Intrinsic::vp_cttz, MVT::v4i32, 24},
{Intrinsic::vp_cttz, MVT::v8i32, 24},
{Intrinsic::vp_cttz, MVT::v16i32, 24},
{Intrinsic::vp_cttz, MVT::nxv1i32, 24},
{Intrinsic::vp_cttz, MVT::nxv2i32, 24},
{Intrinsic::vp_cttz, MVT::nxv4i32, 24},
{Intrinsic::vp_cttz, MVT::nxv8i32, 24},
{Intrinsic::vp_cttz, MVT::nxv16i32, 24},
{Intrinsic::vp_cttz, MVT::v2i64, 25},
{Intrinsic::vp_cttz, MVT::v4i64, 25},
{Intrinsic::vp_cttz, MVT::v8i64, 25},
{Intrinsic::vp_cttz, MVT::v16i64, 25},
{Intrinsic::vp_cttz, MVT::nxv1i64, 25},
{Intrinsic::vp_cttz, MVT::nxv2i64, 25},
{Intrinsic::vp_cttz, MVT::nxv4i64, 25},
{Intrinsic::vp_cttz, MVT::nxv8i64, 25},
};
static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID) {

View File

@ -382,6 +382,216 @@ define void @vp_ctpop() {
ret void
}
define void @vp_ctlz() {
; CHECK-LABEL: 'vp_ctlz'
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %12 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %13 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %14 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %15 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %22 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %23 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %24 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %25 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %32 = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %33 = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %34 = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %35 = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %41 = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %42 = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %43 = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %44 = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
call <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
call <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef, i1 false)
call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
ret void
}
define void @vp_cttz() {
; CHECK-LABEL: 'vp_cttz'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %1 = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %13 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %15 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %22 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %23 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %24 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %25 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %32 = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %33 = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %34 = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %35 = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %41 = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %42 = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %43 = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %44 = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
call <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
call <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef, i1 false)
call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef, i1 false)
call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef, i1 false)
call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef, i1 false)
call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef, i1 false)
call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef, i1 false)
call <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef, i1 false)
call <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef, i1 false)
call <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef, i1 false)
call <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef, i1 false)
call <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef, i1 false)
ret void
}
declare i16 @llvm.bswap.i16(i16)
declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
@ -559,3 +769,83 @@ declare <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64>, <vscale x
declare <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
declare <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
declare <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, <2 x i1>, i32, i1 immarg)
declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, <4 x i1>, i32, i1 immarg)
declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, <8 x i1>, i32, i1 immarg)
declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, <16 x i1>, i32, i1 immarg)
declare <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32, i1 immarg)
declare <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32, i1 immarg)
declare <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32, i1 immarg)
declare <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32, i1 immarg)
declare <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32, i1 immarg)
declare <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32, i1 immarg)
declare <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32, i1 immarg)
declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, <2 x i1>, i32, i1 immarg)
declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, <4 x i1>, i32, i1 immarg)
declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, <8 x i1>, i32, i1 immarg)
declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, <16 x i1>, i32, i1 immarg)
declare <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32, i1 immarg)
declare <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32, i1 immarg)
declare <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32, i1 immarg)
declare <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32, i1 immarg)
declare <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32, i1 immarg)
declare <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32, i1 immarg)
declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, <2 x i1>, i32, i1 immarg)
declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, <4 x i1>, i32, i1 immarg)
declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, <8 x i1>, i32, i1 immarg)
declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, <16 x i1>, i32, i1 immarg)
declare <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32, i1 immarg)
declare <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32, i1 immarg)
declare <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32, i1 immarg)
declare <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32, i1 immarg)
declare <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32, i1 immarg)
declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, <2 x i1>, i32, i1 immarg)
declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, <4 x i1>, i32, i1 immarg)
declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, <8 x i1>, i32, i1 immarg)
declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, <16 x i1>, i32, i1 immarg)
declare <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32, i1 immarg)
declare <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32, i1 immarg)
declare <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32, i1 immarg)
declare <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32, i1 immarg)
declare <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32, i1 immarg)
declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, <2 x i1>, i32, i1 immarg)
declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, <4 x i1>, i32, i1 immarg)
declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, <8 x i1>, i32, i1 immarg)
declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, <16 x i1>, i32, i1 immarg)
declare <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32, i1 immarg)
declare <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32, i1 immarg)
declare <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32, i1 immarg)
declare <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32, i1 immarg)
declare <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32, i1 immarg)
declare <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32, i1 immarg)
declare <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32, i1 immarg)
declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, <2 x i1>, i32, i1 immarg)
declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, <4 x i1>, i32, i1 immarg)
declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, <8 x i1>, i32, i1 immarg)
declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, <16 x i1>, i32, i1 immarg)
declare <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32, i1 immarg)
declare <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32, i1 immarg)
declare <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32, i1 immarg)
declare <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32, i1 immarg)
declare <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32, i1 immarg)
declare <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32, i1 immarg)
declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, <2 x i1>, i32, i1 immarg)
declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, <4 x i1>, i32, i1 immarg)
declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, <8 x i1>, i32, i1 immarg)
declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, <16 x i1>, i32, i1 immarg)
declare <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32, i1 immarg)
declare <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32, i1 immarg)
declare <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32, i1 immarg)
declare <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32, i1 immarg)
declare <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32, i1 immarg)
declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, <2 x i1>, i32, i1 immarg)
declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, <4 x i1>, i32, i1 immarg)
declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, <8 x i1>, i32, i1 immarg)
declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, <16 x i1>, i32, i1 immarg)
declare <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32, i1 immarg)
declare <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32, i1 immarg)
declare <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32, i1 immarg)
declare <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32, i1 immarg)
declare <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32, i1 immarg)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -152,6 +152,10 @@ protected:
<< "(<8 x i16>, <8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.ctpop.v8i16"
<< "(<8 x i16>, <8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.ctlz.v8i16"
<< "(<8 x i16>, <8 x i1>, i32, i1 immarg) ";
Str << " declare <8 x i16> @llvm.vp.cttz.v8i16"
<< "(<8 x i16>, <8 x i1>, i32, i1 immarg) ";
Str << " declare <8 x i16> @llvm.vp.fshl.v8i16"
<< "(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.fshr.v8i16"