[RISCV][VLOPT] Add vector indexed loads and stores to getOperandInfo (#119748)

Use `MO.getOperandNo() == 0` instead of `IsMODef` so naming is clear for the store, since the store should treat its operand 0 like that even though it is not a def.The load should treat its operand 0 def in the same way.
This commit is contained in:
Michael Maitland 2024-12-17 23:51:45 -05:00 committed by GitHub
parent 644643a4ee
commit a61eeaa748
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 577 additions and 535 deletions

View File

@ -270,6 +270,43 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VSSE64_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
// Vector Indexed Instructions
// vs(o|u)xei<eew>.v
// Dest/Data (operand 0) EEW=SEW, EMUL=LMUL. Source EEW=<eew> and
// EMUL=(EEW/SEW)*LMUL.
case RISCV::VLUXEI8_V:
case RISCV::VLOXEI8_V:
case RISCV::VSUXEI8_V:
case RISCV::VSOXEI8_V: {
if (MO.getOperandNo() == 0)
return OperandInfo(MIVLMul, MILog2SEW);
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
}
case RISCV::VLUXEI16_V:
case RISCV::VLOXEI16_V:
case RISCV::VSUXEI16_V:
case RISCV::VSOXEI16_V: {
if (MO.getOperandNo() == 0)
return OperandInfo(MIVLMul, MILog2SEW);
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
}
case RISCV::VLUXEI32_V:
case RISCV::VLOXEI32_V:
case RISCV::VSUXEI32_V:
case RISCV::VSOXEI32_V: {
if (MO.getOperandNo() == 0)
return OperandInfo(MIVLMul, MILog2SEW);
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
}
case RISCV::VLUXEI64_V:
case RISCV::VLOXEI64_V:
case RISCV::VSUXEI64_V:
case RISCV::VSOXEI64_V: {
if (MO.getOperandNo() == 0)
return OperandInfo(MIVLMul, MILog2SEW);
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
}
// Vector Integer Arithmetic Instructions
// Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VI:

View File

@ -10,10 +10,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
define <vscale x 4 x i32> @test_vloxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
; CHECK-LABEL: test_vloxei:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v8
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -30,10 +30,10 @@ entry:
define <vscale x 4 x i32> @test_vloxei2(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
; CHECK-LABEL: test_vloxei2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v8
; CHECK-NEXT: vsll.vi v12, v12, 14
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -50,10 +50,10 @@ entry:
define <vscale x 4 x i32> @test_vloxei3(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
; CHECK-LABEL: test_vloxei3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v8
; CHECK-NEXT: vsll.vi v12, v12, 26
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -74,9 +74,8 @@ define <vscale x 4 x i32> @test_vloxei4(ptr %ptr, <vscale x 4 x i8> %offset, <vs
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v8, v0.t
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -100,10 +99,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
define <vscale x 4 x i32> @test_vloxei5(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
; CHECK-LABEL: test_vloxei5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v9, v8
; CHECK-NEXT: vsll.vi v10, v9, 12
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei16.v v8, (a0), v10
; CHECK-NEXT: ret
entry:
@ -121,12 +120,12 @@ define <vscale x 4 x i32> @test_vloxei6(ptr %ptr, <vscale x 4 x i7> %offset, i64
; CHECK-LABEL: test_vloxei6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a2, 127
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vand.vx v8, v8, a2
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v8
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -146,8 +145,9 @@ define <vscale x 4 x i32> @test_vloxei7(ptr %ptr, <vscale x 4 x i1> %offset, i64
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vsll.vi v12, v8, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -172,10 +172,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64(
define <vscale x 4 x i32> @test_vloxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
; CHECK-LABEL: test_vloxei_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v8
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vloxei64.v v8, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
@ -199,10 +199,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
define <vscale x 4 x i32> @test_vluxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
; CHECK-LABEL: test_vluxei:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v8
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vluxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -227,10 +227,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64(
define <vscale x 4 x i32> @test_vluxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
; CHECK-LABEL: test_vluxei_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v8
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vluxei64.v v8, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
@ -254,10 +254,10 @@ declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64(
define void @test_vsoxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
; CHECK-LABEL: test_vsoxei:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v10
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vsoxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -281,10 +281,10 @@ declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64(
define void @test_vsoxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
; CHECK-LABEL: test_vsoxei_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v10
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vsoxei64.v v8, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:
@ -308,10 +308,10 @@ declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64(
define void @test_vsuxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) {
; CHECK-LABEL: test_vsuxei:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v10
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vsuxei64.v v8, (a0), v12
; CHECK-NEXT: ret
entry:
@ -335,10 +335,10 @@ declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64(
define void @test_vsuxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) {
; CHECK-LABEL: test_vsuxei_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
; CHECK-NEXT: vzext.vf8 v12, v10
; CHECK-NEXT: vsll.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vsuxei64.v v8, (a0), v12, v0.t
; CHECK-NEXT: ret
entry:

View File

@ -5,11 +5,10 @@ define void @snork(ptr %arg, <vscale x 2 x i64> %arg1) {
; CHECK-LABEL: snork:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsoxei64.v v10, (a0), v8
; CHECK-NEXT: ret
bb:

View File

@ -573,6 +573,108 @@ body: |
PseudoVSSE8_V_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */
...
---
name: vsuxeiN_v_data
body: |
bb.0:
; CHECK-LABEL: name: vsuxeiN_v_data
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoVSUXEI8_V_M1_M1 %x, $noreg, $noreg, 1, 3 /* e8 */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
PseudoVSUXEI8_V_M1_M1 %x, $noreg, $noreg, 1, 3 /* e8 */
...
---
name: vsuxeiN_v_data_incompatible_eew
body: |
bb.0:
; CHECK-LABEL: name: vsuxeiN_v_data_incompatible_eew
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoVSUXEI8_V_M1_M1 %x, $noreg, $noreg, 1, 3 /* e8 */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0
PseudoVSUXEI8_V_M1_M1 %x, $noreg, $noreg, 1, 3 /* e8 */
...
---
name: vsuxeiN_v_data_incompatible_emul
body: |
bb.0:
; CHECK-LABEL: name: vsuxeiN_v_data_incompatible_emul
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoVSUXEI8_V_MF2_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
PseudoVSUXEI8_V_MF2_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */
...
---
name: vsuxeiN_v_idx
body: |
bb.0:
; CHECK-LABEL: name: vsuxeiN_v_idx
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoVSUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
PseudoVSUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */
...
---
name: vsuxeiN_v_idx_incompatible_eew
body: |
bb.0:
; CHECK-LABEL: name: vsuxeiN_v_idx_incompatible_eew
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoVSUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0
%y:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
PseudoVSUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */
...
---
name: vsuxeiN_v_idx_incompatible_emul
body: |
bb.0:
; CHECK-LABEL: name: vsuxeiN_v_idx_incompatible_emul
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoVSUXEI8_V_MF2_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
PseudoVSUXEI8_V_MF2_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */
...
---
name: vluxeiN_v_data
body: |
bb.0:
; CHECK-LABEL: name: vluxeiN_v_data
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
%y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
...
---
name: vluxeiN_v_incompatible_eew
body: |
bb.0:
; CHECK-LABEL: name: vluxeiN_v_incompatible_eew
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */
; CHECK-NEXT: %y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0
%y:vr = PseudoVLUXEI8_V_M1_M1 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
...
---
name: vluxeiN_v_data_incompatible_emul
body: |
bb.0:
; CHECK-LABEL: name: vluxeiN_v_data_incompatible_emul
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %y:vr = PseudoVLUXEI8_V_MF2_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 /* tu, mu */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
%y:vr = PseudoVLUXEI8_V_MF2_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
...
---
name: vluxeiN_v_idx
body: |
bb.0:
; CHECK-LABEL: name: vluxeiN_v_idx
; CHECK: %x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: early-clobber %y:vr = PseudoVLUXEI8_V_MF2_M1 $noreg, $noreg, %x, 1, 4 /* e16 */, 0 /* tu, mu */
%x:vr = PseudoVADD_VV_MF2 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
%y:vr = PseudoVLUXEI8_V_MF2_M1 $noreg, $noreg, %x, 1, 4 /* e16 */, 0
...
---
name: vmop_mm
body: |
bb.0:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff