mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 18:06:05 +00:00
[AArch64] Change bound for known zero bits of uaddlv intrinsic
As @efriedma's comment, the largest number of bits that can actually be set for a v8i8 is 11 (the number of bits set in 8*255) so we can change the bound. Additionally, v16i8 type is supported as v8i8. Differential Revision: https://reviews.llvm.org/D158613
This commit is contained in:
parent
834cdc8b64
commit
82e851a407
@ -2165,9 +2165,10 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
|
||||
case Intrinsic::aarch64_neon_uaddlv: {
|
||||
MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
|
||||
unsigned BitWidth = Known.getBitWidth();
|
||||
if (VT == MVT::v8i8) {
|
||||
assert(BitWidth >= 16 && "Unexpected width!");
|
||||
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
|
||||
if (VT == MVT::v8i8 || VT == MVT::v16i8) {
|
||||
unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
|
||||
assert(BitWidth >= Bound && "Unexpected width!");
|
||||
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound);
|
||||
Known.Zero |= Mask;
|
||||
}
|
||||
break;
|
||||
|
@ -300,7 +300,7 @@ define void @insert_vec_v8i8_uaddlv_from_v8i8(ptr %0) {
|
||||
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
|
||||
; CHECK-NEXT: uaddlv.8b h1, v0
|
||||
; CHECK-NEXT: mov.h v0[0], v1[0]
|
||||
; CHECK-NEXT: bic.4h v0, #255, lsl #8
|
||||
; CHECK-NEXT: bic.4h v0, #7, lsl #8
|
||||
; CHECK-NEXT: ushll.4s v0, v0, #0
|
||||
; CHECK-NEXT: ucvtf.4s v0, v0
|
||||
; CHECK-NEXT: str q0, [x0]
|
||||
|
@ -153,8 +153,8 @@ define i32 @saddlv4h_from_v4i16(ptr %A) nounwind {
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) nounwind readnone
|
||||
|
||||
define i32 @uaddlv_known_bits(<8 x i8> %a) {
|
||||
; CHECK-LABEL: uaddlv_known_bits:
|
||||
define i32 @uaddlv_known_bits_v8i8(<8 x i8> %a) {
|
||||
; CHECK-LABEL: uaddlv_known_bits_v8i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uaddlv h0, v0.8b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
@ -163,3 +163,17 @@ define i32 @uaddlv_known_bits(<8 x i8> %a) {
|
||||
%tmp2 = and i32 %tmp1, 65535
|
||||
ret i32 %tmp2
|
||||
}
|
||||
|
||||
declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) nounwind readnone
|
||||
|
||||
define i32 @uaddlv_known_bits_v16i8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: uaddlv_known_bits_v16i8:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: uaddlv h0, v0.16b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vaddlv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
|
||||
%0 = and i32 %vaddlv.i, 65535
|
||||
ret i32 %0
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user