VirtRegRewriter: Add implicit register defs for live out undef lanes (#112679)

If an undef subregister def is live into another block, we need to
maintain a physreg def to track the liveness of those lanes. This
would manifest a verifier error after branch folding, when the cloned
tail block use no longer had a def.

We need to detect interference with other assigned intervals to avoid
clobbering the undef lanes defined in other intervals, since the undef
def didn't count as interference. This is pretty ugly and adds a new
dependency on LiveRegMatrix, keeping it live for one more pass. It also
adds a lot of implicit operand spam (we really should have a better
representation for this).

There is a missing verifier check for this situation. Added an xfailed
test that demonstrates this. We may also be able to revert the changes
in 47d3cbcf842a036c20c3f1c74255cdfc213f41c2.

It might be better to insert an IMPLICIT_DEF before the instruction
rather than using the implicit-def operand.

Fixes #98474
This commit is contained in:
Matt Arsenault 2024-10-28 17:33:53 -07:00 committed by GitHub
parent 61353cc1f6
commit 1ceccbb0dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 643 additions and 39 deletions

View File

@ -118,6 +118,16 @@ public:
/// the segment [Start, End).
bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);
/// Check for interference in the segment [Start, End) that may prevent
/// assignment to PhysReg, like checkInterference. Returns a lane mask of
/// which lanes of the physical register interfere in the segment [Start, End)
/// of some other interval already assigned to PhysReg.
///
/// If this function returns LaneBitmask::getNone(), PhysReg is completely
/// free at the segment [Start, End).
LaneBitmask checkInterferenceLanes(SlotIndex Start, SlotIndex End,
MCRegister PhysReg);
/// Assign VirtReg to PhysReg.
/// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
/// update VirtRegMap. The live range is expected to be available in PhysReg.

View File

@ -244,6 +244,41 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
return false;
}
LaneBitmask LiveRegMatrix::checkInterferenceLanes(SlotIndex Start,
SlotIndex End,
MCRegister PhysReg) {
// Construct artificial live range containing only one segment [Start, End).
VNInfo valno(0, Start);
LiveRange::Segment Seg(Start, End, &valno);
LiveRange LR;
LR.addSegment(Seg);
LaneBitmask InterferingLanes;
// Check for interference with that segment
for (MCRegUnitMaskIterator MCRU(PhysReg, TRI); MCRU.isValid(); ++MCRU) {
auto [Unit, Lanes] = *MCRU;
// LR is stack-allocated. LiveRegMatrix caches queries by a key that
// includes the address of the live range. If (for the same reg unit) this
// checkInterference overload is called twice, without any other query()
// calls in between (on heap-allocated LiveRanges) - which would invalidate
// the cached query - the LR address seen the second time may well be the
// same as that seen the first time, while the Start/End/valno may not - yet
// the same cached result would be fetched. To avoid that, we don't cache
// this query.
//
// FIXME: the usability of the Query API needs to be improved to avoid
// subtle bugs due to query identity. Avoiding caching, for example, would
// greatly simplify things.
LiveIntervalUnion::Query Q;
Q.reset(UserTag, LR, Matrix[Unit]);
if (Q.checkInterference())
InterferingLanes |= Lanes;
}
return InterferingLanes;
}
Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
const LiveInterval *VRegInterval = nullptr;
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {

View File

@ -21,6 +21,7 @@
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@ -203,6 +204,7 @@ class VirtRegRewriter : public MachineFunctionPass {
MachineRegisterInfo *MRI = nullptr;
SlotIndexes *Indexes = nullptr;
LiveIntervals *LIS = nullptr;
LiveRegMatrix *LRM = nullptr;
VirtRegMap *VRM = nullptr;
LiveDebugVariables *DebugVars = nullptr;
DenseSet<Register> RewriteRegs;
@ -215,6 +217,9 @@ class VirtRegRewriter : public MachineFunctionPass {
void handleIdentityCopy(MachineInstr &MI);
void expandCopyBundle(MachineInstr &MI) const;
bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
LaneBitmask liveOutUndefPhiLanesForUndefSubregDef(
const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
MCPhysReg PhysReg, const MachineInstr &MI) const;
public:
static char ID;
@ -247,6 +252,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
@ -262,6 +268,7 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
AU.addRequired<VirtRegMapWrapperLegacy>();
AU.addRequired<LiveRegMatrixWrapperLegacy>();
if (!ClearVirtRegs)
AU.addPreserved<LiveDebugVariables>();
@ -276,6 +283,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
MRI = &MF->getRegInfo();
Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
DebugVars = &getAnalysis<LiveDebugVariables>();
LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
@ -548,6 +556,40 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
return false;
}
/// Compute a lanemask for undef lanes which need to be preserved out of the
/// defining block for a register assignment for a subregister def. \p PhysReg
/// is assigned to \p LI, which is the main range.
LaneBitmask VirtRegRewriter::liveOutUndefPhiLanesForUndefSubregDef(
const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
MCPhysReg PhysReg, const MachineInstr &MI) const {
LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
LaneBitmask LiveOutUndefLanes;
for (const LiveInterval::SubRange &SR : LI.subranges()) {
// Figure out which lanes are undef live into a successor.
LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
for (const MachineBasicBlock *Succ : MBB.successors()) {
if (LIS->isLiveInToMBB(SR, Succ))
LiveOutUndefLanes |= NeedImpDefLanes;
}
}
}
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
LaneBitmask InterferingLanes =
LRM->checkInterferenceLanes(BeforeMIUses, MIIndex.getRegSlot(), PhysReg);
LiveOutUndefLanes &= ~InterferingLanes;
LLVM_DEBUG(if (LiveOutUndefLanes.any()) {
dbgs() << "Need live out undef defs for " << printReg(PhysReg)
<< LiveOutUndefLanes << " from " << printMBBReference(MBB) << '\n';
});
return LiveOutUndefLanes;
}
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<Register, 8> SuperDeads;
@ -602,6 +644,32 @@ void VirtRegRewriter::rewrite() {
MO.setIsUndef(true);
} else if (!MO.isDead()) {
assert(MO.isDef());
if (MO.isUndef()) {
const LiveInterval &LI = LIS->getInterval(VirtReg);
LaneBitmask LiveOutUndefLanes =
liveOutUndefPhiLanesForUndefSubregDef(LI, *MBBI, SubReg,
PhysReg, MI);
if (LiveOutUndefLanes.any()) {
SmallVector<unsigned, 16> CoveringIndexes;
// TODO: Just use one super register def if none of the lanes
// are needed?
if (!TRI->getCoveringSubRegIndexes(
*MRI, MRI->getRegClass(VirtReg), LiveOutUndefLanes,
CoveringIndexes))
llvm_unreachable(
"cannot represent required subregister defs");
// Try to represent the minimum needed live out def as a
// sequence of subregister defs.
//
// FIXME: It would be better if we could directly represent
// liveness with a lanemask instead of spamming operands.
for (unsigned SubIdx : CoveringIndexes)
SuperDefs.push_back(TRI->getSubReg(PhysReg, SubIdx));
}
}
}
}

View File

@ -38,24 +38,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF
; GFX90A-NEXT: $vgpr10 = IMPLICIT_DEF
; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF
; GFX90A-NEXT: $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.59, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.2:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3.Flow17:
@ -111,8 +106,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.6.Flow20:
@ -395,8 +390,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec
@ -434,8 +429,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec
@ -484,8 +479,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec
@ -535,8 +530,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.47, implicit $exec
@ -589,8 +584,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
@ -643,8 +638,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.43, implicit $exec
@ -689,8 +684,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: S_BRANCH %bb.45
; GFX90A-NEXT: {{ $}}
@ -719,8 +714,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: S_BRANCH %bb.46
; GFX90A-NEXT: {{ $}}
@ -748,8 +743,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: S_BRANCH %bb.62
; GFX90A-NEXT: {{ $}}
@ -773,8 +768,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.53, implicit $exec
@ -880,8 +875,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.57, implicit $exec
; GFX90A-NEXT: {{ $}}

View File

@ -32,7 +32,7 @@ body: |
; CHECK-NEXT: dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
; CHECK-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec
@ -82,7 +82,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
; CHECK-NEXT: S_BRANCH %bb.6
; CHECK-NEXT: {{ $}}

View File

@ -30,7 +30,7 @@ body: |
; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF
; CHECK-NEXT: dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
@ -78,7 +78,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX8_IMM undef renamable $sgpr4_sgpr5, 32, 0 :: (invariant load (s256), addrspace 4)
; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}

View File

@ -0,0 +1,55 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s
---
name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr4_vgpr5
; CHECK-NEXT: EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $vgpr3_vgpr4_vgpr5 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
; CHECK-NEXT: EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $vgpr2
%2:vgpr_32 = COPY $vgpr2
S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
bb.1:
undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
bb.2:
S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
bb.3:
EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, %2:vgpr_32, 0, 0, 0, implicit $exec
S_ENDPGM 0
...

View File

@ -0,0 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
; Check for verifier error after tail duplication. An implicit_def of
; a subregsiter is needed to maintain liveness after assignment.
define amdgpu_vs void @test(i32 inreg %cmp, i32 %e0) {
; CHECK-LABEL: test:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %load
; CHECK-NEXT: s_mov_b32 s1, s0
; CHECK-NEXT: s_mov_b32 s2, s0
; CHECK-NEXT: s_mov_b32 s3, s0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: buffer_load_format_xy v[1:2], v1, s[0:3], 0 idxen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: exp mrt0 v0, v1, v2, v0
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: exp mrt0 v0, v1, v2, v0
; CHECK-NEXT: s_endpgm
entry:
%cond = icmp eq i32 %cmp, 0
br i1 %cond, label %end, label %load
load:
%data1 = call <2 x i32> @llvm.amdgcn.struct.buffer.load.format.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
%e1 = extractelement <2 x i32> %data1, i32 0
%e2 = extractelement <2 x i32> %data1, i32 1
br label %end
end:
%out1 = phi i32 [ 0, %entry ], [ %e1, %load ]
%out2 = phi i32 [ poison, %entry ], [ %e2, %load ]
call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 %e0, i32 %out1, i32 %out2, i32 %e0, i1 false, i1 false)
ret void
}

View File

@ -0,0 +1,363 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s
# The partial def of %0 introduces a live out undef def of %0.sub1
# into bb.3. We need to maintain this liveness with an explicit def of
# the physical subregister. Without this, a verifier error would
# appear after tail duplication.
---
name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0
S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
bb.1:
undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
bb.2:
S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
bb.3:
EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0
S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
bb.1:
undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
bb.2:
S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
bb.3:
EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr2, implicit-def $vgpr0
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0
S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
bb.1:
undef %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
bb.2:
S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
bb.3:
EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# Test another use of the value before the block end.
---
name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1
; CHECK-NEXT: S_NOP 0, implicit renamable $vgpr0_vgpr1
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0
S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
bb.1:
undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
S_NOP 0, implicit %0
S_BRANCH %bb.3
bb.2:
S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
bb.3:
EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# The undef subregister is not live out, no implicit def should be added for it
---
name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0
S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
bb.1:
undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
bb.2:
S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
bb.3:
EXP 0, killed %0.sub0, killed %0.sub0, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# In bb.2, %0 should be assigned to vgpr0_vgpr1. Make sure the value
# copied from $vgpr0 into %3 isn't clobbered by the undef phi def for
# %0.sub1.
---
name: assigned_physreg_subregister_interference
tracksRegLiveness: true
frameInfo:
adjustsStack: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
wwmReservedRegs:
- '$vgpr63'
body: |
; CHECK-LABEL: name: assigned_physreg_subregister_interference
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40
; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40
; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr34, 2, $vgpr40
; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr35, 3, $vgpr40
; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr36, 4, $vgpr40
; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr37, 5, $vgpr40
; CHECK-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $sgpr34_sgpr35, $vgpr0_vgpr1:0x000000000000000F
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; CHECK-NEXT: renamable $sgpr5 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
; CHECK-NEXT: renamable $vcc = V_CMP_EQ_U64_e64 $sgpr4_sgpr5, killed $vgpr0_vgpr1, implicit $exec
; CHECK-NEXT: renamable $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 killed renamable $vcc, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: dead $sgpr30_sgpr31 = noconvergent SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; CHECK-NEXT: renamable $vgpr1 = COPY $vgpr0, implicit $exec
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 123, implicit $exec
; CHECK-NEXT: $exec = S_XOR_B64 $exec, renamable $sgpr36_sgpr37, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $vgpr1, $sgpr34_sgpr35
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = COPY renamable $sgpr34_sgpr35
; CHECK-NEXT: renamable $vgpr0 = V_ADD_U32_e32 1, killed $vgpr1, implicit $exec
; CHECK-NEXT: $sgpr37 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 5
; CHECK-NEXT: $sgpr36 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 4
; CHECK-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 3
; CHECK-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 2
; CHECK-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 1
; CHECK-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0
; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK-NEXT: SI_RETURN implicit $vgpr0
bb.0:
liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1, $vgpr63
$vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr63
$vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr63
$vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr34, 2, $vgpr63
$vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr35, 3, $vgpr63
$vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr36, 4, $vgpr63
$vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr37, 5, $vgpr63
undef %0.sub0:vreg_64 = COPY $vgpr0
%0.sub1:vreg_64 = COPY $vgpr1
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
renamable $sgpr34_sgpr35 = S_MOV_B64 $exec
bb.1:
liveins: $vgpr63, $sgpr34_sgpr35
renamable $sgpr4 = V_READFIRSTLANE_B32 %0.sub0, implicit $exec
renamable $sgpr5 = V_READFIRSTLANE_B32 %0.sub1, implicit $exec
renamable $vcc = V_CMP_EQ_U64_e64 $sgpr4_sgpr5, %0, implicit $exec
renamable $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 killed renamable $vcc, implicit-def $exec, implicit-def dead $scc, implicit $exec
bb.2:
liveins: $vgpr63, $sgpr4_sgpr5:0x000000000000000F, $sgpr34_sgpr35, $sgpr36_sgpr37
dead $sgpr30_sgpr31 = noconvergent SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
%3:vgpr_32 = COPY $vgpr0
undef %0.sub0:vreg_64 = V_MOV_B32_e32 123, implicit $exec
$exec = S_XOR_B64_term $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc
S_CBRANCH_EXECNZ %bb.1, implicit $exec
bb.3:
liveins: $vgpr63, $sgpr34_sgpr35
$exec = S_MOV_B64_term killed renamable $sgpr34_sgpr35
bb.4:
liveins: $vgpr63
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
%6:vgpr_32 = V_ADD_U32_e32 1, %3, implicit $exec
$vgpr0 = COPY %6
$sgpr37 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 5
$sgpr36 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 4
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 3
$sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2
$sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 1
$sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
SI_RETURN implicit $vgpr0
...

View File

@ -0,0 +1,36 @@
# XFAIL: *
# RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=none -filetype=null %s
# FIXME: This should fail the machine verifier. There is a missing def
# of $vgpr2 in bb.1, which is needed since it's live into bb.3
---
name: missing_live_out_subreg_def
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
bb.1:
liveins: $vgpr0
renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
bb.2:
liveins: $vgpr0
renamable $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
renamable $vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
bb.3:
liveins: $vgpr0, $vgpr1_vgpr2
EXP 0, killed renamable $vgpr0, killed renamable $vgpr1, renamable $vgpr2, renamable $vgpr0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...