[AArch64][GlobalISel] Improve legalization of G_PTR_ADD (#91763)

The testing we have for vector ptradd was a bit lacking. In adding tests
this patch found a couple of issues mostly with the way v3 vectors of
ptrs were sometimes legalized via i64, and with non-i64 additions. It
does not attempt to fix the issue with mergevalues from returning vector
ptrs.
This commit is contained in:
David Green 2024-05-13 21:58:41 +01:00 committed by GitHub
parent 55e59083cb
commit 34de2151e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 227 additions and 6 deletions

View File

@ -69,8 +69,9 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
unsigned EltSize = OrigTy.getScalarSizeInBits();
if (LeftoverSize % EltSize != 0)
return {-1, -1};
LeftoverTy = LLT::scalarOrVector(
ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
LeftoverTy =
LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
OrigTy.getElementType());
} else {
LeftoverTy = LLT::scalar(LeftoverSize);
}
@ -212,7 +213,7 @@ void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
appendVectorElts(AllElts, PartRegs[i]);
Register Leftover = PartRegs[PartRegs.size() - 1];
if (MRI.getType(Leftover).isScalar())
if (!MRI.getType(Leftover).isVector())
AllElts.push_back(Leftover);
else
appendVectorElts(AllElts, Leftover);

View File

@ -177,9 +177,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_PTR_ADD)
.legalFor({{p0, s64}, {v2p0, v2s64}})
.clampScalar(1, s64, s64)
.clampNumElements(0, v2p0, v2p0)
.clampNumElements(1, v2s64, v2s64);
.clampScalarOrElt(1, s64, s64)
.clampNumElements(0, v2p0, v2p0);
getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});

View File

@ -0,0 +1,221 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64-none-eabi -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; Note: these tests use stores instead of returns as the return handling for
; vector ptrs is currently sometimes create invalid unmerge values.
define void @vector_gep_i32(ptr %b, i32 %off, ptr %p) {
; CHECK-LABEL: vector_gep_i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add x8, x0, w1, sxtw
; CHECK-NEXT: str x8, [x2]
; CHECK-NEXT: ret
entry:
%g = getelementptr i8, ptr %b, i32 %off
store ptr %g, ptr %p
ret void
}
define void @vector_gep_i64(ptr %b, i64 %off, ptr %p) {
; CHECK-LABEL: vector_gep_i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add x8, x0, x1
; CHECK-NEXT: str x8, [x2]
; CHECK-NEXT: ret
entry:
%g = getelementptr i8, ptr %b, i64 %off
store ptr %g, ptr %p
ret void
}
define void @vector_gep_v1i32(<1 x ptr> %b, <1 x i32> %off, ptr %p) {
; CHECK-SD-LABEL: vector_gep_v1i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl d1, d1, #32
; CHECK-SD-NEXT: ssra d0, d1, #32
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_gep_v1i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s1
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: add x8, x9, w8, sxtw
; CHECK-GI-NEXT: str x8, [x0]
; CHECK-GI-NEXT: ret
entry:
%g = getelementptr i8, <1 x ptr> %b, <1 x i32> %off
store <1 x ptr> %g, ptr %p
ret void
}
define void @vector_gep_v2i32(<2 x ptr> %b, <2 x i32> %off, ptr %p) {
; CHECK-LABEL: vector_gep_v2i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: saddw v0.2d, v0.2d, v1.2s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
entry:
%g = getelementptr i8, <2 x ptr> %b, <2 x i32> %off
store <2 x ptr> %g, ptr %p
ret void
}
define void @vector_gep_v3i32(<3 x ptr> %b, <3 x i32> %off, ptr %p) {
; CHECK-SD-LABEL: vector_gep_v3i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: saddw2 v2.2d, v2.2d, v3.4s
; CHECK-SD-NEXT: str d2, [x0, #16]
; CHECK-SD-NEXT: saddw v0.2d, v0.2d, v3.2s
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_gep_v3i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: smov x8, v3.s[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: smov x9, v3.s[1]
; CHECK-GI-NEXT: mov s3, v3.s[2]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: fmov x8, d2
; CHECK-GI-NEXT: mov v1.d[1], x9
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: add x8, x8, w9, sxtw
; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: str x8, [x0, #16]
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%g = getelementptr i8, <3 x ptr> %b, <3 x i32> %off
store <3 x ptr> %g, ptr %p
ret void
}
define void @vector_gep_v4i32(<4 x ptr> %b, <4 x i32> %off, ptr %p) {
; CHECK-SD-LABEL: vector_gep_v4i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: saddw2 v1.2d, v1.2d, v2.4s
; CHECK-SD-NEXT: saddw v0.2d, v0.2d, v2.2s
; CHECK-SD-NEXT: stp q0, q1, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_gep_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: saddw v0.2d, v0.2d, v2.2s
; CHECK-GI-NEXT: saddw2 v1.2d, v1.2d, v2.4s
; CHECK-GI-NEXT: stp q0, q1, [x0]
; CHECK-GI-NEXT: ret
entry:
%g = getelementptr i8, <4 x ptr> %b, <4 x i32> %off
store <4 x ptr> %g, ptr %p
ret void
}
define void @vector_gep_v1i64(<1 x ptr> %b, <1 x i64> %off, ptr %p) {
; CHECK-SD-LABEL: vector_gep_v1i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: add d0, d0, d1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_gep_v1i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: add x8, x8, x9
; CHECK-GI-NEXT: str x8, [x0]
; CHECK-GI-NEXT: ret
entry:
%g = getelementptr i8, <1 x ptr> %b, <1 x i64> %off
store <1 x ptr> %g, ptr %p
ret void
}
define void @vector_gep_v2i64(<2 x ptr> %b, <2 x i64> %off, ptr %p) {
; CHECK-LABEL: vector_gep_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
entry:
%g = getelementptr i8, <2 x ptr> %b, <2 x i64> %off
store <2 x ptr> %g, ptr %p
ret void
}
define void @vector_gep_v3i64(<3 x ptr> %b, <3 x i64> %off, ptr %p) {
; CHECK-SD-LABEL: vector_gep_v3i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: add d1, d2, d5
; CHECK-SD-NEXT: str d1, [x0, #16]
; CHECK-SD-NEXT: add v0.2d, v0.2d, v3.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_gep_v3i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
; CHECK-GI-NEXT: fmov x8, d2
; CHECK-GI-NEXT: fmov x9, d5
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
; CHECK-GI-NEXT: add x8, x8, x9
; CHECK-GI-NEXT: str x8, [x0, #16]
; CHECK-GI-NEXT: add v0.2d, v0.2d, v3.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%g = getelementptr i8, <3 x ptr> %b, <3 x i64> %off
store <3 x ptr> %g, ptr %p
ret void
}
define void @vector_gep_v4i64(<4 x ptr> %b, <4 x i64> %off, ptr %p) {
; CHECK-SD-LABEL: vector_gep_v4i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d
; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-SD-NEXT: stp q0, q1, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_gep_v4i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: stp q0, q1, [x0]
; CHECK-GI-NEXT: ret
entry:
%g = getelementptr i8, <4 x ptr> %b, <4 x i64> %off
store <4 x ptr> %g, ptr %p
ret void
}
define void @vector_gep_v4i128(<2 x ptr> %b, <2 x i128> %off, ptr %p) {
; CHECK-LABEL: vector_gep_v4i128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov d1, x0
; CHECK-NEXT: mov v1.d[1], x2
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: str q0, [x4]
; CHECK-NEXT: ret
entry:
%g = getelementptr i8, <2 x ptr> %b, <2 x i128> %off
store <2 x ptr> %g, ptr %p
ret void
}