[DAG] Attempt to fold bswap(shl(x,c)) -> zext(bswap(trunc(shl(x,c-bw/2))))

If the shl is at least half the bitwidth (i.e. the lower half of the bswap source is zero), then we can reduce the shift and perform the bswap at half the bitwidth and just zero extend.

Based off PR51391 + PR53867

Differential Revision: https://reviews.llvm.org/D120192
This commit is contained in:
Simon Pilgrim 2022-02-24 19:18:37 +00:00
parent b3e9fdd170
commit 370ebc9d9a
5 changed files with 59 additions and 28 deletions

View File

@ -9610,6 +9610,26 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
}
// fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
// iff x >= bw/2 (i.e. lower half is known zero)
unsigned BW = VT.getScalarSizeInBits();
if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
ShAmt->getZExtValue() >= (BW / 2) &&
(ShAmt->getZExtValue() % 16) == 0 && TLI.isTruncateFree(VT, HalfVT) &&
(!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
SDValue Res = N0.getOperand(0);
if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
Res = DAG.getNode(ISD::SHL, DL, VT, Res,
DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
return DAG.getZExtOrTrunc(Res, DL, VT);
}
}
return SDValue();
}

View File

@ -442,8 +442,8 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: lsl w8, w8, #16
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: lsr w0, w8, #16
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0

View File

@ -499,8 +499,8 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: lsl w8, w8, #16
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: lsr w0, w8, #16
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*

View File

@ -87,17 +87,16 @@ define void @demand_one_loaded_byte(i64* %xp, i32* %yp) {
define i64 @test_bswap64_shift48_zext(i16 %a0) {
; X86-LABEL: test_bswap64_shift48_zext:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $16, %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolw $8, %ax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: retl
;
; X64-LABEL: test_bswap64_shift48_zext:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shlq $48, %rax
; X64-NEXT: bswapq %rax
; X64-NEXT: rolw $8, %di
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: retq
%z = zext i16 %a0 to i64
%s = shl i64 %z, 48
@ -109,16 +108,15 @@ define i64 @test_bswap64_shift48(i64 %a0) {
; X86-LABEL: test_bswap64_shift48:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $16, %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: rolw $8, %ax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: retl
;
; X64-LABEL: test_bswap64_shift48:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shlq $48, %rax
; X64-NEXT: bswapq %rax
; X64-NEXT: rolw $8, %di
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: retq
%s = shl i64 %a0, 48
%b = call i64 @llvm.bswap.i64(i64 %s)

View File

@ -1209,20 +1209,33 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
; i8* p;
; (i32) p[1] | ((i32) p[0] << 8)
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movzwl (%eax), %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: retl
; BSWAP-LABEL: zext_load_i32_by_i8_bswap:
; BSWAP: # %bb.0:
; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
; BSWAP-NEXT: movzwl (%eax), %eax
; BSWAP-NEXT: rolw $8, %ax
; BSWAP-NEXT: movzwl %ax, %eax
; BSWAP-NEXT: retl
;
; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
; CHECK64: # %bb.0:
; CHECK64-NEXT: movzwl (%rdi), %eax
; CHECK64-NEXT: shll $16, %eax
; CHECK64-NEXT: bswapl %eax
; CHECK64-NEXT: retq
; MOVBE-LABEL: zext_load_i32_by_i8_bswap:
; MOVBE: # %bb.0:
; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
; MOVBE-NEXT: movbew (%eax), %ax
; MOVBE-NEXT: movzwl %ax, %eax
; MOVBE-NEXT: retl
;
; BSWAP64-LABEL: zext_load_i32_by_i8_bswap:
; BSWAP64: # %bb.0:
; BSWAP64-NEXT: movzwl (%rdi), %eax
; BSWAP64-NEXT: rolw $8, %ax
; BSWAP64-NEXT: movzwl %ax, %eax
; BSWAP64-NEXT: retq
;
; MOVBE64-LABEL: zext_load_i32_by_i8_bswap:
; MOVBE64: # %bb.0:
; MOVBE64-NEXT: movbew (%rdi), %ax
; MOVBE64-NEXT: movzwl %ax, %eax
; MOVBE64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 1