mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-14 02:46:08 +00:00
[X86] Add INSERT_SUBVECTOR to ComputeNumSignBits
This adds support for calculating sign bits of insert_subvector. I based it on the computeKnownBits. My motivating case is propagating sign bits information across basic blocks on AVX targets where concatenating using insert_subvector is common. Differential Revision: https://reviews.llvm.org/D56283 llvm-svn: 350432
This commit is contained in:
parent
6b3153ada0
commit
cfeb1cf9af
@ -3693,7 +3693,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
|
|||||||
}
|
}
|
||||||
return ComputeNumSignBits(Src, Depth + 1);
|
return ComputeNumSignBits(Src, Depth + 1);
|
||||||
}
|
}
|
||||||
case ISD::CONCAT_VECTORS:
|
case ISD::CONCAT_VECTORS: {
|
||||||
// Determine the minimum number of sign bits across all demanded
|
// Determine the minimum number of sign bits across all demanded
|
||||||
// elts of the input vectors. Early out if the result is already 1.
|
// elts of the input vectors. Early out if the result is already 1.
|
||||||
Tmp = std::numeric_limits<unsigned>::max();
|
Tmp = std::numeric_limits<unsigned>::max();
|
||||||
@ -3711,6 +3711,40 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
|
|||||||
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
|
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
|
||||||
return Tmp;
|
return Tmp;
|
||||||
}
|
}
|
||||||
|
case ISD::INSERT_SUBVECTOR: {
|
||||||
|
// If we know the element index, demand any elements from the subvector and
|
||||||
|
// the remainder from the src its inserted into, otherwise demand them all.
|
||||||
|
SDValue Src = Op.getOperand(0);
|
||||||
|
SDValue Sub = Op.getOperand(1);
|
||||||
|
auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
|
||||||
|
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
|
||||||
|
if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
|
||||||
|
Tmp = std::numeric_limits<unsigned>::max();
|
||||||
|
uint64_t Idx = SubIdx->getZExtValue();
|
||||||
|
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
|
||||||
|
if (!!DemandedSubElts) {
|
||||||
|
Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
|
||||||
|
if (Tmp == 1) return 1; // early-out
|
||||||
|
}
|
||||||
|
APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
|
||||||
|
APInt DemandedSrcElts = DemandedElts & ~SubMask;
|
||||||
|
if (!!DemandedSrcElts) {
|
||||||
|
Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
|
||||||
|
Tmp = std::min(Tmp, Tmp2);
|
||||||
|
}
|
||||||
|
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
|
||||||
|
return Tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not able to determine the index so just assume worst case.
|
||||||
|
Tmp = ComputeNumSignBits(Sub, Depth + 1);
|
||||||
|
if (Tmp == 1) return 1; // early-out
|
||||||
|
Tmp2 = ComputeNumSignBits(Src, Depth + 1);
|
||||||
|
Tmp = std::min(Tmp, Tmp2);
|
||||||
|
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
|
||||||
|
return Tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If we are looking at the loaded value of the SDNode.
|
// If we are looking at the loaded value of the SDNode.
|
||||||
if (Op.getResNo() == 0) {
|
if (Op.getResNo() == 0) {
|
||||||
|
@ -394,20 +394,11 @@ define void @cross_bb_signbits_insert_subvec(<32 x i8>* %ptr, <32 x i8> %x, <32
|
|||||||
; X32-LABEL: cross_bb_signbits_insert_subvec:
|
; X32-LABEL: cross_bb_signbits_insert_subvec:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: vextractf128 $1, %ymm0, %xmm3
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; X32-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||||
; X32-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
|
; X32-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2
|
||||||
; X32-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
; X32-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
|
||||||
; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||||
; X32-NEXT: vextractf128 $1, %ymm0, %xmm3
|
|
||||||
; X32-NEXT: vpsllw $7, %xmm3, %xmm3
|
|
||||||
; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
||||||
; X32-NEXT: vpand %xmm4, %xmm3, %xmm3
|
|
||||||
; X32-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3
|
|
||||||
; X32-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
||||||
; X32-NEXT: vpand %xmm4, %xmm0, %xmm0
|
|
||||||
; X32-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
|
|
||||||
; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
|
||||||
; X32-NEXT: vandnps %ymm1, %ymm0, %ymm1
|
; X32-NEXT: vandnps %ymm1, %ymm0, %ymm1
|
||||||
; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
|
; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
|
||||||
; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
|
; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||||
@ -417,20 +408,11 @@ define void @cross_bb_signbits_insert_subvec(<32 x i8>* %ptr, <32 x i8> %x, <32
|
|||||||
;
|
;
|
||||||
; X64-LABEL: cross_bb_signbits_insert_subvec:
|
; X64-LABEL: cross_bb_signbits_insert_subvec:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||||
; X64-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
|
; X64-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2
|
||||||
; X64-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
; X64-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
|
||||||
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||||
; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
|
|
||||||
; X64-NEXT: vpsllw $7, %xmm3, %xmm3
|
|
||||||
; X64-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
||||||
; X64-NEXT: vpand %xmm4, %xmm3, %xmm3
|
|
||||||
; X64-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3
|
|
||||||
; X64-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
||||||
; X64-NEXT: vpand %xmm4, %xmm0, %xmm0
|
|
||||||
; X64-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
|
|
||||||
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
|
||||||
; X64-NEXT: vandnps %ymm1, %ymm0, %ymm1
|
; X64-NEXT: vandnps %ymm1, %ymm0, %ymm1
|
||||||
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
|
; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user