mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 12:36:07 +00:00
AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis (#80003)
Implement PhiLoweringHelper for GlobalISel in DivergenceLoweringHelper. Use machine uniformity analysis to find divergent i1 phis and select them as lane mask phis in same way SILowerI1Copies select VReg_1 phis. Note that divergent i1 phis include phis created by LCSSA and all cases of uses outside of cycle are actually covered by "lowering LCSSA phis". GlobalISel lane masks are registers with sgpr register class and S1 LLT. TODO: General goal is that instructions created in this pass are fully instruction-selected so that selection of lane mask phis is not split across multiple passes. patch 3 from: https://github.com/llvm/llvm-project/pull/73337
This commit is contained in:
parent
89ec940b4a
commit
06f711a906
@ -752,6 +752,24 @@ public:
|
||||
Register createVirtualRegister(const TargetRegisterClass *RegClass,
|
||||
StringRef Name = "");
|
||||
|
||||
/// All attributes(register class or bank and low-level type) a virtual
|
||||
/// register can have.
|
||||
struct VRegAttrs {
|
||||
RegClassOrRegBank RCOrRB;
|
||||
LLT Ty;
|
||||
};
|
||||
|
||||
/// Returns register class or bank and low level type of \p Reg. Always safe
|
||||
/// to use. Special values are returned when \p Reg does not have some of the
|
||||
/// attributes.
|
||||
VRegAttrs getVRegAttrs(Register Reg) {
|
||||
return {getRegClassOrRegBank(Reg), getType(Reg)};
|
||||
}
|
||||
|
||||
/// Create and return a new virtual register in the function with the
|
||||
/// specified register attributes(register class or bank and low level type).
|
||||
Register createVirtualRegister(VRegAttrs RegAttr, StringRef Name = "");
|
||||
|
||||
/// Create and return a new virtual register in the function with the same
|
||||
/// attributes as the given register.
|
||||
Register cloneVirtualRegister(Register VReg, StringRef Name = "");
|
||||
|
@ -32,6 +32,25 @@ MachineUniformityInfo computeMachineUniformityInfo(
|
||||
MachineFunction &F, const MachineCycleInfo &cycleInfo,
|
||||
const MachineDomTree &domTree, bool HasBranchDivergence);
|
||||
|
||||
/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
|
||||
class MachineUniformityAnalysisPass : public MachineFunctionPass {
|
||||
MachineUniformityInfo UI;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
MachineUniformityAnalysisPass();
|
||||
|
||||
MachineUniformityInfo &getUniformityInfo() { return UI; }
|
||||
const MachineUniformityInfo &getUniformityInfo() const { return UI; }
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
void print(raw_ostream &OS, const Module *M = nullptr) const override;
|
||||
|
||||
// TODO: verify analysis
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H
|
||||
|
@ -167,6 +167,15 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
|
||||
return Reg;
|
||||
}
|
||||
|
||||
Register MachineRegisterInfo::createVirtualRegister(VRegAttrs RegAttr,
|
||||
StringRef Name) {
|
||||
Register Reg = createIncompleteVirtualRegister(Name);
|
||||
VRegInfo[Reg].first = RegAttr.RCOrRB;
|
||||
setType(Reg, RegAttr.Ty);
|
||||
noteNewVirtualRegister(Reg);
|
||||
return Reg;
|
||||
}
|
||||
|
||||
Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
|
||||
StringRef Name) {
|
||||
Register Reg = createIncompleteVirtualRegister(Name);
|
||||
|
@ -165,25 +165,6 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
|
||||
|
||||
namespace {
|
||||
|
||||
/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
|
||||
class MachineUniformityAnalysisPass : public MachineFunctionPass {
|
||||
MachineUniformityInfo UI;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
MachineUniformityAnalysisPass();
|
||||
|
||||
MachineUniformityInfo &getUniformityInfo() { return UI; }
|
||||
const MachineUniformityInfo &getUniformityInfo() const { return UI; }
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
void print(raw_ostream &OS, const Module *M = nullptr) const override;
|
||||
|
||||
// TODO: verify analysis
|
||||
};
|
||||
|
||||
class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
@ -16,7 +16,11 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "SILowerI1Copies.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
|
||||
|
||||
@ -42,14 +46,146 @@ public:
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addRequired<MachinePostDominatorTree>();
|
||||
AU.addRequired<MachineUniformityAnalysisPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
|
||||
class DivergenceLoweringHelper : public PhiLoweringHelper {
|
||||
public:
|
||||
DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
|
||||
MachinePostDominatorTree *PDT,
|
||||
MachineUniformityInfo *MUI);
|
||||
|
||||
private:
|
||||
MachineUniformityInfo *MUI = nullptr;
|
||||
MachineIRBuilder B;
|
||||
Register buildRegCopyToLaneMask(Register Reg);
|
||||
|
||||
public:
|
||||
void markAsLaneMask(Register DstReg) const override;
|
||||
void getCandidatesForLowering(
|
||||
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
|
||||
void collectIncomingValuesFromPhi(
|
||||
const MachineInstr *MI,
|
||||
SmallVectorImpl<Incoming> &Incomings) const override;
|
||||
void replaceDstReg(Register NewReg, Register OldReg,
|
||||
MachineBasicBlock *MBB) override;
|
||||
void buildMergeLaneMasks(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, const DebugLoc &DL,
|
||||
Register DstReg, Register PrevReg,
|
||||
Register CurReg) override;
|
||||
void constrainAsLaneMask(Incoming &In) override;
|
||||
};
|
||||
|
||||
DivergenceLoweringHelper::DivergenceLoweringHelper(
|
||||
MachineFunction *MF, MachineDominatorTree *DT,
|
||||
MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
|
||||
: PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
|
||||
|
||||
// _(s1) -> SReg_32/64(s1)
|
||||
void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
|
||||
assert(MRI->getType(DstReg) == LLT::scalar(1));
|
||||
|
||||
if (MRI->getRegClassOrNull(DstReg)) {
|
||||
if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
|
||||
return;
|
||||
llvm_unreachable("Failed to constrain register class");
|
||||
}
|
||||
|
||||
MRI->setRegClass(DstReg, ST->getBoolRC());
|
||||
}
|
||||
|
||||
void DivergenceLoweringHelper::getCandidatesForLowering(
|
||||
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
|
||||
LLT S1 = LLT::scalar(1);
|
||||
|
||||
// Add divergent i1 phis to the list
|
||||
for (MachineBasicBlock &MBB : *MF) {
|
||||
for (MachineInstr &MI : MBB.phis()) {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
|
||||
Vreg1Phis.push_back(&MI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
|
||||
const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
|
||||
for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
|
||||
Incomings.emplace_back(MI->getOperand(i).getReg(),
|
||||
MI->getOperand(i + 1).getMBB(), Register());
|
||||
}
|
||||
}
|
||||
|
||||
void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
|
||||
MachineBasicBlock *MBB) {
|
||||
BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
|
||||
.addReg(NewReg);
|
||||
}
|
||||
|
||||
// Copy Reg to new lane mask register, insert a copy after instruction that
|
||||
// defines Reg while skipping phis if needed.
|
||||
Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
|
||||
Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
|
||||
MachineInstr *Instr = MRI->getVRegDef(Reg);
|
||||
MachineBasicBlock *MBB = Instr->getParent();
|
||||
B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
|
||||
B.buildCopy(LaneMask, Reg);
|
||||
return LaneMask;
|
||||
}
|
||||
|
||||
// bb.previous
|
||||
// %PrevReg = ...
|
||||
//
|
||||
// bb.current
|
||||
// %CurReg = ...
|
||||
//
|
||||
// %DstReg - not defined
|
||||
//
|
||||
// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
|
||||
//
|
||||
// bb.previous
|
||||
// %PrevReg = ...
|
||||
// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
|
||||
//
|
||||
// bb.current
|
||||
// %CurReg = ...
|
||||
// %CurRegCopy:sreg_32(s1) = COPY %CurReg
|
||||
// ...
|
||||
// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
|
||||
// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
|
||||
// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
|
||||
//
|
||||
// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
|
||||
void DivergenceLoweringHelper::buildMergeLaneMasks(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
|
||||
Register DstReg, Register PrevReg, Register CurReg) {
|
||||
// DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
|
||||
// TODO: check if inputs are constants or results of a compare.
|
||||
|
||||
Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
|
||||
Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
|
||||
Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
|
||||
Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
|
||||
|
||||
B.setInsertPt(MBB, I);
|
||||
B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
|
||||
B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
|
||||
B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
|
||||
}
|
||||
|
||||
void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
|
||||
|
||||
} // End anonymous namespace.
|
||||
|
||||
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
|
||||
"AMDGPU GlobalISel divergence lowering", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
|
||||
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
|
||||
"AMDGPU GlobalISel divergence lowering", false, false)
|
||||
|
||||
@ -64,5 +200,12 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
|
||||
|
||||
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
|
||||
MachineFunction &MF) {
|
||||
return false;
|
||||
MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
|
||||
MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
|
||||
MachineUniformityInfo &MUI =
|
||||
getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
|
||||
|
||||
DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
|
||||
|
||||
return Helper.lowerPhis();
|
||||
}
|
||||
|
@ -210,6 +210,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
|
||||
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
|
||||
const Register DefReg = I.getOperand(0).getReg();
|
||||
const LLT DefTy = MRI->getType(DefReg);
|
||||
|
||||
if (DefTy == LLT::scalar(1)) {
|
||||
if (!AllowRiskySelect) {
|
||||
LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
|
||||
@ -3552,8 +3553,6 @@ bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
|
||||
if (I.isPHI())
|
||||
return selectPHI(I);
|
||||
|
||||
if (!I.isPreISelOpcode()) {
|
||||
if (I.isCopy())
|
||||
@ -3696,6 +3695,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
|
||||
return selectWaveAddress(I);
|
||||
case AMDGPU::G_STACKRESTORE:
|
||||
return selectStackRestore(I);
|
||||
case AMDGPU::G_PHI:
|
||||
return selectPHI(I);
|
||||
default:
|
||||
return selectImpl(I, *CoverageInfo);
|
||||
}
|
||||
|
@ -31,9 +31,9 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static Register insertUndefLaneMask(MachineBasicBlock *MBB,
|
||||
MachineRegisterInfo *MRI,
|
||||
Register LaneMaskRegAttrs);
|
||||
static Register
|
||||
insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
|
||||
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs);
|
||||
|
||||
namespace {
|
||||
|
||||
@ -78,7 +78,7 @@ public:
|
||||
MachineBasicBlock::iterator I, const DebugLoc &DL,
|
||||
Register DstReg, Register PrevReg,
|
||||
Register CurReg) override;
|
||||
void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
|
||||
void constrainAsLaneMask(Incoming &In) override;
|
||||
|
||||
bool lowerCopiesFromI1();
|
||||
bool lowerCopiesToI1();
|
||||
@ -304,7 +304,8 @@ public:
|
||||
/// blocks, so that the SSA updater doesn't have to search all the way to the
|
||||
/// function entry.
|
||||
void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
|
||||
MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs,
|
||||
ArrayRef<Incoming> Incomings = {}) {
|
||||
assert(LoopLevel < CommonDominators.size());
|
||||
|
||||
@ -411,14 +412,15 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
|
||||
return new SILowerI1Copies();
|
||||
}
|
||||
|
||||
Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
|
||||
Register LaneMaskRegAttrs) {
|
||||
return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
|
||||
Register
|
||||
llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
|
||||
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs) {
|
||||
return MRI->createVirtualRegister(LaneMaskRegAttrs);
|
||||
}
|
||||
|
||||
static Register insertUndefLaneMask(MachineBasicBlock *MBB,
|
||||
MachineRegisterInfo *MRI,
|
||||
Register LaneMaskRegAttrs) {
|
||||
static Register
|
||||
insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
|
||||
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs) {
|
||||
MachineFunction &MF = *MBB->getParent();
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
@ -619,7 +621,7 @@ bool PhiLoweringHelper::lowerPhis() {
|
||||
for (auto &Incoming : Incomings) {
|
||||
MachineBasicBlock &IMBB = *Incoming.Block;
|
||||
if (PIA.isSource(IMBB)) {
|
||||
constrainIncomingRegisterTakenAsIs(Incoming);
|
||||
constrainAsLaneMask(Incoming);
|
||||
SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
|
||||
} else {
|
||||
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
|
||||
@ -911,6 +913,4 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
|
||||
}
|
||||
}
|
||||
|
||||
void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
|
||||
return;
|
||||
}
|
||||
void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {}
|
||||
|
@ -31,7 +31,8 @@ struct Incoming {
|
||||
: Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
|
||||
};
|
||||
|
||||
Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs);
|
||||
Register createLaneMaskReg(MachineRegisterInfo *MRI,
|
||||
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs);
|
||||
|
||||
class PhiLoweringHelper {
|
||||
public:
|
||||
@ -47,7 +48,7 @@ protected:
|
||||
MachineRegisterInfo *MRI = nullptr;
|
||||
const GCNSubtarget *ST = nullptr;
|
||||
const SIInstrInfo *TII = nullptr;
|
||||
Register LaneMaskRegAttrs;
|
||||
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs;
|
||||
|
||||
#ifndef NDEBUG
|
||||
DenseSet<Register> PhiRegisters;
|
||||
@ -68,7 +69,7 @@ public:
|
||||
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
|
||||
|
||||
void initializeLaneMaskRegisterAttributes(Register LaneMask) {
|
||||
LaneMaskRegAttrs = LaneMask;
|
||||
LaneMaskRegAttrs = MRI->getVRegAttrs(LaneMask);
|
||||
}
|
||||
|
||||
bool isLaneMaskReg(Register Reg) const {
|
||||
@ -91,7 +92,7 @@ public:
|
||||
MachineBasicBlock::iterator I,
|
||||
const DebugLoc &DL, Register DstReg,
|
||||
Register PrevReg, Register CurReg) = 0;
|
||||
virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
|
||||
virtual void constrainAsLaneMask(Incoming &In) = 0;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; REQUIRES: do-not-run-me
|
||||
|
||||
; Divergent phis that don't require lowering using lane mask merging
|
||||
|
||||
|
@ -1,5 +1,9 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
# Test is updated but copies between S1-register-with-reg-class and
|
||||
# register-with-reg-class-no-LLT fail machine verification
|
||||
# REQUIRES: do-not-run-me-with-machine-verifier
|
||||
|
||||
--- |
|
||||
define void @divergent_i1_phi_uniform_branch() {ret void}
|
||||
@ -46,7 +50,7 @@ body: |
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: successors: %bb.4(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
|
||||
; GFX10-NEXT: G_BR %bb.4
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.3:
|
||||
@ -126,6 +130,7 @@ body: |
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
|
||||
; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2
|
||||
@ -136,12 +141,17 @@ body: |
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]]
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI]](s1), [[C4]], [[C3]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C4]], [[C3]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
@ -191,30 +201,37 @@ body: |
|
||||
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
|
||||
; GFX10-NEXT: SI_RETURN
|
||||
bb.0:
|
||||
@ -279,25 +296,28 @@ body: |
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %39(s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI1]](s32), [[C3]]
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C3]]
|
||||
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C4]]
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]]
|
||||
; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5
|
||||
; GFX10-NEXT: G_BR %bb.3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -320,22 +340,26 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C8]]
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C8]]
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C9]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.6
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.6:
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[XOR1]](s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
|
||||
; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
|
||||
; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI4]](s1), [[C11]], [[C10]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY10]](s1), [[C11]], [[C10]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
|
||||
; GFX10-NEXT: SI_RETURN
|
||||
bb.0:
|
||||
@ -468,7 +492,7 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI %32(s1), %bb.4, [[C5]](s1), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = G_PHI %32(s1), %bb.4, [[C5]](s1), %bb.0
|
||||
; GFX10-NEXT: G_BRCOND [[PHI1]](s1), %bb.5
|
||||
; GFX10-NEXT: G_BR %bb.6
|
||||
; GFX10-NEXT: {{ $}}
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; REQUIRES: do-not-run-me
|
||||
|
||||
; This file contains various tests that have divergent i1s used outside of
|
||||
; the loop. These are lane masks is sgpr and need to have correct value in
|
||||
|
@ -1,5 +1,9 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
# Test is updated but copies between S1-register-with-reg-class and
|
||||
# register-with-reg-class-no-LLT fail machine verification
|
||||
# REQUIRES: do-not-run-me-with-machine-verifier
|
||||
|
||||
---
|
||||
name: divergent_i1_phi_used_outside_loop
|
||||
@ -19,30 +23,49 @@ body: |
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[FCMP]](s1), %bb.0, %13(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %36(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.0, %24(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]]
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
|
||||
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[PHI2]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_2]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
|
||||
; GFX10-NEXT: SI_RETURN
|
||||
bb.0:
|
||||
@ -103,42 +126,67 @@ body: |
|
||||
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.3
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(p1) = G_PHI [[MV]](p1), %bb.0, %11(p1), %bb.3
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C1]](s1), %bb.0, %13(s1), %bb.3
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI2]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %41, %bb.3
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_OR_B32_]](s1), %bb.0, %27(s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.3
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(p1) = G_PHI [[MV]](p1), %bb.0, %11(p1), %bb.3
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PHI1]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PHI3]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C2]]
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.3:
|
||||
; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.2, [[PHI2]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.2
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]]
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY12]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[PHI1]], [[C3]](s64)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[PHI3]], [[C3]](s64)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[PHI]], [[C4]]
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[PHI2]], [[C4]]
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[ADD]](s32), [[C5]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.1
|
||||
; GFX10-NEXT: G_BR %bb.4
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.4:
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[PHI2]](s1), %bb.3
|
||||
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
|
||||
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
|
||||
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI4]](s1), [[C7]], [[C6]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY14]](s1), [[C7]], [[C6]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32))
|
||||
; GFX10-NEXT: SI_RETURN
|
||||
bb.0:
|
||||
@ -211,30 +259,37 @@ body: |
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[FCMP]](s1), %bb.0, %13(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %24(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = G_PHI [[FCMP]](s1), %bb.0, %13(s1), %bb.1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
|
||||
; GFX10-NEXT: SI_RETURN
|
||||
bb.0:
|
||||
@ -298,6 +353,7 @@ body: |
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.1
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -305,29 +361,49 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: G_BR %bb.3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI %14(s1), %bb.8, [[C1]](s1), %bb.0
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[C1]](s1), %bb.0, %39(s1), %bb.8
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY6]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.5
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.3:
|
||||
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF3]](s1), %bb.1, %72(s1), %bb.7
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF2]](s1), %bb.1, %61, %bb.7
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.1, %48, %bb.7
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]]
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]]
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI5]](s32)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C5]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
|
||||
; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.4
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -335,9 +411,17 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.7(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
|
||||
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C7]]
|
||||
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI2]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI5]], [[C7]]
|
||||
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI5]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.7
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.5:
|
||||
@ -353,22 +437,32 @@ body: |
|
||||
; GFX10-NEXT: bb.7:
|
||||
; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[C6]](s1), %bb.4, [[C3]](s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s1) = G_PHI [[ICMP2]](s1), %bb.4, [[C3]](s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3
|
||||
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]]
|
||||
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32)
|
||||
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C9]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI5]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY17]], [[C9]]
|
||||
; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.8
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.8:
|
||||
; GFX10-NEXT: successors: %bb.2(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.7
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.7
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
|
||||
; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.7
|
||||
; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1)
|
||||
; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY19]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
bb.0:
|
||||
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
@ -479,35 +573,40 @@ body: |
|
||||
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]](s1), %bb.0, %38(s1), %bb.6
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI1]]
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI2]]
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.3:
|
||||
; GFX10-NEXT: successors: %bb.4(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
|
||||
; GFX10-NEXT: G_STORE [[PHI1]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: G_STORE [[PHI2]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.4:
|
||||
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI1]]
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI2]]
|
||||
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.5
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -515,27 +614,35 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.6(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C4]]
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C4]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.6:
|
||||
; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C3]](s1), %bb.5, [[C2]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[C2]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI3]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY10]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.7
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.7:
|
||||
; GFX10-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.6
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[ICMP]](s1), %bb.6
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.6
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.6
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI2]](s32), %bb.6
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_1]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
|
||||
; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY11]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.8
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.8:
|
||||
@ -648,47 +755,75 @@ body: |
|
||||
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C1]](s1), %bb.0, %14(s1), %bb.3
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI2]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %53(s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %42, %bb.3
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[C1]](s1), %bb.0, %32(s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]]
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
|
||||
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.3:
|
||||
; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.2, [[PHI2]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[PHI2]], %bb.1, [[DEF2]](s1), %bb.2
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]]
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[PHI3]]
|
||||
; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY11]]
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1)
|
||||
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C4]]
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI1]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C4]]
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.4
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.4:
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[FREEZE]](s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3
|
||||
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
|
||||
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI4]](s1), [[C6]], [[C5]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[C6]], [[C5]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32))
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
@ -770,20 +905,39 @@ body: |
|
||||
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF2]](s1), %bb.0, %56, %bb.5
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %43, %bb.5
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]]
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -798,6 +952,7 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.5(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
|
||||
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64)
|
||||
@ -805,9 +960,16 @@ body: |
|
||||
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
|
||||
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C7]]
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]]
|
||||
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C8]]
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]]
|
||||
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.5
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.4:
|
||||
@ -817,21 +979,27 @@ body: |
|
||||
; GFX10-NEXT: bb.5:
|
||||
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.3, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]]
|
||||
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]]
|
||||
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI4]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.6
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.6:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[PHI3]](s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI6]](s32)
|
||||
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5
|
||||
; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32)
|
||||
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
bb.0:
|
||||
successors: %bb.1(0x80000000)
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; REQUIRES: do-not-run-me
|
||||
|
||||
; Simples case, if - then, that requires lane mask merging,
|
||||
; %phi lane mask will hold %val_A at %A. Lanes that are active in %B
|
||||
|
@ -1,5 +1,9 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
# Test is updated but copies between S1-register-with-reg-class and
|
||||
# register-with-reg-class-no-LLT fail machine verification
|
||||
# REQUIRES: do-not-run-me-with-machine-verifier
|
||||
|
||||
---
|
||||
name: divergent_i1_phi_if_then
|
||||
@ -18,6 +22,7 @@ body: |
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]]
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -28,13 +33,18 @@ body: |
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]]
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI]](s1), [[C4]], [[C3]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C4]], [[C3]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
@ -85,6 +95,7 @@ body: |
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C]]
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
@ -93,7 +104,9 @@ body: |
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI %10(s1), %bb.3, [[DEF]](s1), %bb.0
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %19(s1), %bb.3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[COPY5]](s1)
|
||||
; GFX10-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_ELSE [[SI_IF]](s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -102,6 +115,10 @@ body: |
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C1]]
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.4
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.3:
|
||||
@ -109,14 +126,19 @@ body: |
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.1
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.4:
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[PHI]](s1), %bb.1, [[ICMP1]](s1), %bb.2
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[COPY5]](s1), %bb.1, [[S_OR_B32_]](s1), %bb.2
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_ELSE]](s32)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI1]](s1), [[C3]], [[C4]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[C3]], [[C4]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
@ -183,20 +205,28 @@ body: |
|
||||
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.3, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.3
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %35, %bb.3
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.3, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.3
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -210,23 +240,28 @@ body: |
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C5]]
|
||||
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C5]]
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C5]]
|
||||
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C6]]
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C6]]
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.3:
|
||||
; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.2, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.2, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.2, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI3]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY8]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.4
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.4:
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1(0x80000000)
|
||||
@ -308,20 +343,28 @@ body: |
|
||||
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.3, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.3
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %48, %bb.3
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.3, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.3
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -329,6 +372,7 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64)
|
||||
@ -341,10 +385,11 @@ body: |
|
||||
; GFX10-NEXT: bb.3:
|
||||
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %34(s1), %bb.5, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, %47(s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI3]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY10]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.6
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -358,21 +403,30 @@ body: |
|
||||
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]]
|
||||
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C8]]
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]]
|
||||
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
|
||||
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C9]]
|
||||
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]]
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.5:
|
||||
; GFX10-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s1) = G_PHI [[ICMP2]](s1), %bb.4, [[C4]](s1), %bb.2
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[C4]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]]
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[COPY12]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.6:
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI6]](s32)
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1(0x80000000)
|
||||
@ -481,20 +535,28 @@ body: |
|
||||
; GFX10-NEXT: [[MV3:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %61, %bb.3
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -502,6 +564,7 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64)
|
||||
@ -514,10 +577,11 @@ body: |
|
||||
; GFX10-NEXT: bb.3:
|
||||
; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %37(s1), %bb.5, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, %60(s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]]
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI3]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.8
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -525,6 +589,7 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
|
||||
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C8]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV3]], [[SHL2]](s64)
|
||||
@ -537,9 +602,14 @@ body: |
|
||||
; GFX10-NEXT: bb.5:
|
||||
; GFX10-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s1) = G_PHI %47(s1), %bb.7, [[C4]](s1), %bb.2
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[C4]](s1), %bb.2, %71(s1), %bb.7
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2
|
||||
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]]
|
||||
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.6:
|
||||
@ -552,21 +622,30 @@ body: |
|
||||
; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD3]], [[C11]]
|
||||
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD3]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C11]]
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C11]]
|
||||
; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
|
||||
; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C12]]
|
||||
; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C12]]
|
||||
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP3]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.7:
|
||||
; GFX10-NEXT: successors: %bb.5(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.6, [[DEF]](s32), %bb.4
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s1) = G_PHI [[ICMP3]](s1), %bb.6, [[C7]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[C7]](s1), %bb.4, [[S_OR_B32_2]](s1), %bb.6
|
||||
; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.6, [[DEF]](s32), %bb.4
|
||||
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]]
|
||||
; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[COPY17]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.5
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.8:
|
||||
; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32)
|
||||
; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32)
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1(0x80000000)
|
||||
@ -696,20 +775,39 @@ body: |
|
||||
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF2]](s1), %bb.0, %56, %bb.5
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %43, %bb.5
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]]
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -724,6 +822,7 @@ body: |
|
||||
; GFX10-NEXT: successors: %bb.5(0x80000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
|
||||
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64)
|
||||
@ -731,9 +830,16 @@ body: |
|
||||
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
|
||||
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C7]]
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]]
|
||||
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C8]]
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]]
|
||||
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.5
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.4:
|
||||
@ -743,21 +849,27 @@ body: |
|
||||
; GFX10-NEXT: bb.5:
|
||||
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.3, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]]
|
||||
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]]
|
||||
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI4]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.6
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.6:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[PHI3]](s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI6]](s32)
|
||||
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5
|
||||
; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32)
|
||||
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
bb.0:
|
||||
successors: %bb.1(0x80000000)
|
||||
@ -857,7 +969,11 @@ body: |
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]]
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: G_BR %bb.7
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
@ -870,18 +986,27 @@ body: |
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, %14(s1), %bb.7
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI %67(s1), %bb.6, %70, %bb.7
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI %49(s1), %bb.6, %48(s1), %bb.7
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI %35(s1), %bb.6, %34(s1), %bb.7
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]]
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY9]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI1]](s1), %17(s32)
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY8]](s1), %17(s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.4
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.3:
|
||||
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.3(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.1, %19(s32), %bb.3
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.1, %19(s32), %bb.3
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32)
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT1]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.6
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -890,18 +1015,28 @@ body: |
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INTRINSIC_CONVERGENT]](s32)
|
||||
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]]
|
||||
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %25(s32)
|
||||
; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF5:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %63(s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.5
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.5:
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[ICMP2]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT2]](s32), %bb.4
|
||||
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[COPY3]], [[COPY2]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY14]](s1), [[COPY3]], [[COPY2]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32)
|
||||
; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT3]](s32)
|
||||
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
@ -911,17 +1046,42 @@ body: |
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT1]](s32), %bb.3
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
|
||||
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %42(s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %56(s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[DEF6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.7:
|
||||
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT2]](s32), %bb.4, [[PHI6]](s32), %bb.2, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INTRINSIC_CONVERGENT]](s32), %bb.2, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PHI]](s1), %bb.2, [[C2]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[DEF3]](s1), %bb.0, [[PHI7]], %bb.2, [[S_OR_B32_1]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32 = PHI [[DEF2]](s1), %bb.0, [[PHI1]], %bb.2, [[DEF5]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI9:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, [[PHI2]], %bb.2, [[DEF4]](s1), %bb.4
|
||||
; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT2]](s32), %bb.4, [[PHI10]](s32), %bb.2, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI11:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INTRINSIC_CONVERGENT]](s32), %bb.2, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]]
|
||||
; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]]
|
||||
; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]]
|
||||
; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[PHI9]]
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI8]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY20]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_5]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_6:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY19]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_6:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_6:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_6]](s1), [[S_AND_B32_6]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_6]](s1)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY17]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.1
|
||||
bb.0:
|
||||
successors: %bb.7(0x80000000)
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; REQUIRES: do-not-run-me
|
||||
|
||||
define void @temporal_divergent_i1_phi(float %val, ptr %addr) {
|
||||
; GFX10-LABEL: temporal_divergent_i1_phi:
|
||||
|
@ -1,5 +1,9 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
# Test is updated but copies between S1-register-with-reg-class and
|
||||
# register-with-reg-class-no-LLT fail machine verification
|
||||
# REQUIRES: do-not-run-me-with-machine-verifier
|
||||
|
||||
---
|
||||
name: temporal_divergent_i1_phi
|
||||
@ -17,30 +21,37 @@ body: |
|
||||
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY3]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[PHI2]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
|
||||
; GFX10-NEXT: SI_RETURN
|
||||
bb.0:
|
||||
@ -97,30 +108,37 @@ body: |
|
||||
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
|
||||
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.2:
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
|
||||
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
|
||||
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
|
||||
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
|
||||
; GFX10-NEXT: SI_RETURN
|
||||
bb.0:
|
||||
@ -183,20 +201,31 @@ body: |
|
||||
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
|
||||
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.1:
|
||||
; GFX10-NEXT: successors: %bb.3(0x50000000), %bb.5(0x30000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5
|
||||
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF2]](s1), %bb.0, %53(s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %42, %bb.5
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5
|
||||
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]]
|
||||
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]]
|
||||
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
|
||||
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
|
||||
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
|
||||
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
|
||||
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
|
||||
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
|
||||
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
|
||||
; GFX10-NEXT: G_BR %bb.5
|
||||
; GFX10-NEXT: {{ $}}
|
||||
@ -218,8 +247,12 @@ body: |
|
||||
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
|
||||
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C7]]
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[COPY2]]
|
||||
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C7]]
|
||||
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[COPY2]]
|
||||
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: G_BR %bb.5
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.4:
|
||||
@ -229,20 +262,25 @@ body: |
|
||||
; GFX10-NEXT: bb.5:
|
||||
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.3, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI4]](s1), [[PHI]](s32)
|
||||
; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1
|
||||
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
|
||||
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]]
|
||||
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), [[PHI2]](s32)
|
||||
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
|
||||
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
|
||||
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.6
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: bb.6:
|
||||
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[PHI3]](s1), %bb.5
|
||||
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI6]](s32)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5
|
||||
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_2]](s1)
|
||||
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
|
||||
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY14]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX10-NEXT: G_BR %bb.2
|
||||
bb.0:
|
||||
successors: %bb.1(0x80000000)
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
define void @temporal_divergent_i32(float %val, ptr %addr) {
|
||||
; GFX10-LABEL: temporal_divergent_i32:
|
||||
|
@ -1,5 +1,5 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: temporal_divergent_i32
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
|
||||
; REQUIRES: do-not-run-me
|
||||
|
||||
; Make sure the branch targets are correct after lowering llvm.amdgcn.if
|
||||
|
||||
|
@ -422,7 +422,7 @@ body: |
|
||||
G_BR %bb.2
|
||||
|
||||
bb.2:
|
||||
%6:sgpr(s32) = PHI %0(s32), %bb.0, %5(s32), %bb.1
|
||||
%6:sgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1
|
||||
$sgpr0 = COPY %6(s32)
|
||||
S_SETPC_B64 undef $sgpr30_sgpr31
|
||||
|
||||
@ -476,7 +476,7 @@ body: |
|
||||
G_BR %bb.2
|
||||
|
||||
bb.2:
|
||||
%6:vgpr(s32) = PHI %0(s32), %bb.0, %5(s32), %bb.1
|
||||
%6:vgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1
|
||||
$vgpr0 = COPY %6
|
||||
S_SETPC_B64 undef $sgpr30_sgpr31
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s
|
||||
; REQUIRES: do-not-run-me
|
||||
|
||||
define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) {
|
||||
; GFX7-LABEL: v_div_fmas_f32:
|
||||
|
Loading…
x
Reference in New Issue
Block a user