mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-03 20:16:05 +00:00

Change the intersect for the anticipated algorithm to ignore unknown when anticipating. This effectively allows VXRM writes speculatively because it could do a VXRM write even when there's branches where VXRM is unneeded. The importance of this change is because VXRM writes causes pipeline flushes in some micro-architectures and so it makes sense to allow more aggressive hoisting even if it causes some degradation for the slow path. An example is this code: ``` typedef unsigned char uint8_t; __attribute__ ((noipa)) void foo (uint8_t *dst, int i_dst_stride, uint8_t *src1, int i_src1_stride, uint8_t *src2, int i_src2_stride, int i_width, int i_height ) { for( int y = 0; y < i_height; y++ ) { for( int x = 0; x < i_width; x++ ) dst[x] = ( src1[x] + src2[x] + 1 ) >> 1; dst += i_dst_stride; src1 += i_src1_stride; src2 += i_src2_stride; } } ``` With this patch, the code above generates a hoisting VXRM writes out of the outer loop.
505 lines
15 KiB
C++
505 lines
15 KiB
C++
//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass inserts writes to the VXRM CSR as needed by vector instructions.
|
|
// Each instruction that uses VXRM carries an operand that contains its required
|
|
// VXRM value. This pass tries to optimize placement to avoid redundant writes
|
|
// to VXRM.
|
|
//
|
|
// This is done using 2 dataflow algorithms. The first is a forward data flow
|
|
// to calculate where a VXRM value is available. The second is a backwards
|
|
// dataflow to determine where a VXRM value is anticipated.
|
|
//
|
|
// Finally, we use the results of these two dataflows to insert VXRM writes
|
|
// where a value is anticipated, but not available.
|
|
//
|
|
// FIXME: This pass does not split critical edges, so there can still be some
|
|
// redundancy.
|
|
//
|
|
// FIXME: If we are willing to have writes that aren't always needed, we could
|
|
// reduce the number of VXRM writes in some cases.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "MCTargetDesc/RISCVBaseInfo.h"
|
|
#include "RISCV.h"
|
|
#include "RISCVSubtarget.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include <queue>
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "riscv-insert-write-vxrm"
|
|
#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
|
|
|
|
namespace {
|
|
|
|
class VXRMInfo {
|
|
uint8_t VXRMImm = 0;
|
|
|
|
enum : uint8_t {
|
|
Uninitialized,
|
|
Static,
|
|
Unknown,
|
|
} State = Uninitialized;
|
|
|
|
public:
|
|
VXRMInfo() {}
|
|
|
|
static VXRMInfo getUnknown() {
|
|
VXRMInfo Info;
|
|
Info.setUnknown();
|
|
return Info;
|
|
}
|
|
|
|
bool isValid() const { return State != Uninitialized; }
|
|
void setUnknown() { State = Unknown; }
|
|
bool isUnknown() const { return State == Unknown; }
|
|
|
|
bool isStatic() const { return State == Static; }
|
|
|
|
void setVXRMImm(unsigned Imm) {
|
|
assert(Imm <= 3 && "Unexpected VXRM value");
|
|
VXRMImm = Imm;
|
|
State = Static;
|
|
}
|
|
unsigned getVXRMImm() const {
|
|
assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
|
|
return VXRMImm;
|
|
}
|
|
|
|
bool operator==(const VXRMInfo &Other) const {
|
|
// Uninitialized is only equal to another Uninitialized.
|
|
if (State != Other.State)
|
|
return false;
|
|
|
|
if (isStatic())
|
|
return VXRMImm == Other.VXRMImm;
|
|
|
|
assert((isValid() || isUnknown()) && "Unexpected state");
|
|
return true;
|
|
}
|
|
|
|
bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
|
|
|
|
// Calculate the VXRMInfo visible to a block assuming this and Other are
|
|
// both predecessors.
|
|
VXRMInfo intersect(const VXRMInfo &Other) const {
|
|
// If the new value isn't valid, ignore it.
|
|
if (!Other.isValid())
|
|
return *this;
|
|
|
|
// If this value isn't valid, this must be the first predecessor, use it.
|
|
if (!isValid())
|
|
return Other;
|
|
|
|
// If either is unknown, the result is unknown.
|
|
if (isUnknown() || Other.isUnknown())
|
|
return VXRMInfo::getUnknown();
|
|
|
|
// If we have an exact match, return this.
|
|
if (*this == Other)
|
|
return *this;
|
|
|
|
// Otherwise the result is unknown.
|
|
return VXRMInfo::getUnknown();
|
|
}
|
|
|
|
// Calculate the VXRMInfo visible to a block assuming this and Other
|
|
// are both predecessors. To allow speculatively running WriteVXRM
|
|
// we will ignore Unknowns if one of this and Other have valid
|
|
// WriteVXRM. Rationale: WriteVXRM causes a pipeline flush in some
|
|
// uarchs and moving it outside loops is very important for some
|
|
// workloads.
|
|
VXRMInfo intersectAnticipated(const VXRMInfo &Other) const {
|
|
// If the new value isn't valid, ignore it.
|
|
if (!Other.isValid())
|
|
return *this;
|
|
|
|
// If this value isn't valid, this must be the first predecessor, use it.
|
|
if (!isValid())
|
|
return Other;
|
|
|
|
// If either is unknown, the result is the other one.
|
|
if (isUnknown())
|
|
return Other;
|
|
if (Other.isUnknown())
|
|
return *this;
|
|
|
|
// If we have an exact match, return this.
|
|
if (*this == Other)
|
|
return *this;
|
|
|
|
// Otherwise the result is unknown.
|
|
return VXRMInfo::getUnknown();
|
|
}
|
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
|
/// Support for debugging, callable in GDB: V->dump()
|
|
LLVM_DUMP_METHOD void dump() const {
|
|
print(dbgs());
|
|
dbgs() << "\n";
|
|
}
|
|
|
|
void print(raw_ostream &OS) const {
|
|
OS << '{';
|
|
if (!isValid())
|
|
OS << "Uninitialized";
|
|
else if (isUnknown())
|
|
OS << "Unknown";
|
|
else
|
|
OS << getVXRMImm();
|
|
OS << '}';
|
|
}
|
|
#endif
|
|
};
|
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
|
LLVM_ATTRIBUTE_USED
|
|
inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
|
|
V.print(OS);
|
|
return OS;
|
|
}
|
|
#endif
|
|
|
|
struct BlockData {
|
|
// Indicates if the block uses VXRM. Uninitialized means no use.
|
|
VXRMInfo VXRMUse;
|
|
|
|
// Indicates the VXRM output from the block. Unitialized means transparent.
|
|
VXRMInfo VXRMOut;
|
|
|
|
// Keeps track of the available VXRM value at the start of the basic bloc.
|
|
VXRMInfo AvailableIn;
|
|
|
|
// Keeps track of the available VXRM value at the end of the basic block.
|
|
VXRMInfo AvailableOut;
|
|
|
|
// Keeps track of what VXRM is anticipated at the start of the basic block.
|
|
VXRMInfo AnticipatedIn;
|
|
|
|
// Keeps track of what VXRM is anticipated at the end of the basic block.
|
|
VXRMInfo AnticipatedOut;
|
|
|
|
// Keeps track of whether the block is already in the queue.
|
|
bool InQueue;
|
|
|
|
BlockData() = default;
|
|
};
|
|
|
|
class RISCVInsertWriteVXRM : public MachineFunctionPass {
|
|
const TargetInstrInfo *TII;
|
|
|
|
std::vector<BlockData> BlockInfo;
|
|
std::queue<const MachineBasicBlock *> WorkList;
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.setPreservesCFG();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
StringRef getPassName() const override {
|
|
return RISCV_INSERT_WRITE_VXRM_NAME;
|
|
}
|
|
|
|
private:
|
|
bool computeVXRMChanges(const MachineBasicBlock &MBB);
|
|
void computeAvailable(const MachineBasicBlock &MBB);
|
|
void computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB);
|
|
void emitWriteVXRM(MachineBasicBlock &MBB);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
char RISCVInsertWriteVXRM::ID = 0;
|
|
|
|
INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
|
|
false, false)
|
|
|
|
static bool ignoresVXRM(const MachineInstr &MI) {
|
|
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
|
|
default:
|
|
return false;
|
|
case RISCV::VNCLIP_WI:
|
|
case RISCV::VNCLIPU_WI:
|
|
return MI.getOperand(3).getImm() == 0;
|
|
}
|
|
}
|
|
|
|
bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
|
|
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
|
|
|
bool NeedVXRMWrite = false;
|
|
for (const MachineInstr &MI : MBB) {
|
|
int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
|
|
if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
|
|
unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
|
|
|
|
if (!BBInfo.VXRMUse.isValid())
|
|
BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
|
|
|
|
BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
|
|
NeedVXRMWrite = true;
|
|
continue;
|
|
}
|
|
|
|
if (MI.isCall() || MI.isInlineAsm() ||
|
|
MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) {
|
|
if (!BBInfo.VXRMUse.isValid())
|
|
BBInfo.VXRMUse.setUnknown();
|
|
|
|
BBInfo.VXRMOut.setUnknown();
|
|
}
|
|
}
|
|
|
|
return NeedVXRMWrite;
|
|
}
|
|
|
|
void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
|
|
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
|
|
|
BBInfo.InQueue = false;
|
|
|
|
VXRMInfo Available;
|
|
if (MBB.pred_empty()) {
|
|
Available.setUnknown();
|
|
} else {
|
|
for (const MachineBasicBlock *P : MBB.predecessors())
|
|
Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
|
|
}
|
|
|
|
// If we don't have any valid available info, wait until we do.
|
|
if (!Available.isValid())
|
|
return;
|
|
|
|
if (Available != BBInfo.AvailableIn) {
|
|
BBInfo.AvailableIn = Available;
|
|
LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
|
|
<< " changed to " << BBInfo.AvailableIn << "\n");
|
|
}
|
|
|
|
if (BBInfo.VXRMOut.isValid())
|
|
Available = BBInfo.VXRMOut;
|
|
|
|
if (Available == BBInfo.AvailableOut)
|
|
return;
|
|
|
|
BBInfo.AvailableOut = Available;
|
|
LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
|
|
<< " changed to " << BBInfo.AvailableOut << "\n");
|
|
|
|
// Add the successors to the work list so that we can propagate.
|
|
for (MachineBasicBlock *S : MBB.successors()) {
|
|
if (!BlockInfo[S->getNumber()].InQueue) {
|
|
BlockInfo[S->getNumber()].InQueue = true;
|
|
WorkList.push(S);
|
|
}
|
|
}
|
|
}
|
|
|
|
void RISCVInsertWriteVXRM::computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB) {
|
|
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
|
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
|
|
|
|
BBInfo.InQueue = false;
|
|
|
|
VXRMInfo Anticipated;
|
|
if (MBB.succ_empty()) {
|
|
Anticipated.setUnknown();
|
|
} else {
|
|
for (const MachineBasicBlock *S : MBB.successors())
|
|
if (ST.hasVXRMPipelineFlush())
|
|
Anticipated =
|
|
Anticipated.intersectAnticipated(BlockInfo[S->getNumber()].AnticipatedIn);
|
|
else
|
|
Anticipated =
|
|
Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
|
|
}
|
|
|
|
// If we don't have any valid anticipated info, wait until we do.
|
|
if (!Anticipated.isValid())
|
|
return;
|
|
|
|
if (Anticipated != BBInfo.AnticipatedOut) {
|
|
BBInfo.AnticipatedOut = Anticipated;
|
|
LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
|
|
<< " changed to " << BBInfo.AnticipatedOut << "\n");
|
|
}
|
|
|
|
// If this block reads VXRM, copy it.
|
|
if (BBInfo.VXRMUse.isValid())
|
|
Anticipated = BBInfo.VXRMUse;
|
|
|
|
if (Anticipated == BBInfo.AnticipatedIn)
|
|
return;
|
|
|
|
BBInfo.AnticipatedIn = Anticipated;
|
|
LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
|
|
<< " changed to " << BBInfo.AnticipatedIn << "\n");
|
|
|
|
// Add the predecessors to the work list so that we can propagate.
|
|
for (MachineBasicBlock *P : MBB.predecessors()) {
|
|
if (!BlockInfo[P->getNumber()].InQueue) {
|
|
BlockInfo[P->getNumber()].InQueue = true;
|
|
WorkList.push(P);
|
|
}
|
|
}
|
|
}
|
|
|
|
void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
|
|
const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
|
|
|
VXRMInfo Info = BBInfo.AvailableIn;
|
|
|
|
// Flag to indicates we need to insert a VXRM write. We want to delay it as
|
|
// late as possible in this block.
|
|
bool PendingInsert = false;
|
|
|
|
// Insert VXRM write if anticipated and not available.
|
|
if (BBInfo.AnticipatedIn.isStatic()) {
|
|
// If this is the entry block and the value is anticipated, insert.
|
|
if (MBB.isEntryBlock()) {
|
|
PendingInsert = true;
|
|
} else {
|
|
// Search for any predecessors that wouldn't satisfy our requirement and
|
|
// insert a write VXRM if needed.
|
|
// NOTE: If one predecessor is able to provide the requirement, but
|
|
// another isn't, it means we have a critical edge. The better placement
|
|
// would be to split the critical edge.
|
|
for (MachineBasicBlock *P : MBB.predecessors()) {
|
|
const BlockData &PInfo = BlockInfo[P->getNumber()];
|
|
// If it's available out of the predecessor, then we're ok.
|
|
if (PInfo.AvailableOut.isStatic() &&
|
|
PInfo.AvailableOut.getVXRMImm() ==
|
|
BBInfo.AnticipatedIn.getVXRMImm())
|
|
continue;
|
|
// If the predecessor anticipates this value for all its succesors,
|
|
// then a write to VXRM would have already occured before this block is
|
|
// executed.
|
|
if (PInfo.AnticipatedOut.isStatic() &&
|
|
PInfo.AnticipatedOut.getVXRMImm() ==
|
|
BBInfo.AnticipatedIn.getVXRMImm())
|
|
continue;
|
|
PendingInsert = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
Info = BBInfo.AnticipatedIn;
|
|
}
|
|
|
|
for (MachineInstr &MI : MBB) {
|
|
int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
|
|
if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
|
|
unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
|
|
|
|
if (PendingInsert || !Info.isStatic() ||
|
|
Info.getVXRMImm() != NewVXRMImm) {
|
|
assert((!PendingInsert ||
|
|
(Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
|
|
"Pending VXRM insertion mismatch");
|
|
LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
|
|
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
|
|
.addImm(NewVXRMImm);
|
|
PendingInsert = false;
|
|
}
|
|
|
|
MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
|
|
/*IsImp*/ true));
|
|
Info.setVXRMImm(NewVXRMImm);
|
|
continue;
|
|
}
|
|
|
|
if (MI.isCall() || MI.isInlineAsm() ||
|
|
MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr))
|
|
Info.setUnknown();
|
|
}
|
|
|
|
// If all our successors anticipate a value, do the insert.
|
|
// NOTE: It's possible that not all predecessors of our successor provide the
|
|
// correct value. This can occur on critical edges. If we don't split the
|
|
// critical edge we'll also have a write vxrm in the succesor that is
|
|
// redundant with this one.
|
|
if (PendingInsert ||
|
|
(BBInfo.AnticipatedOut.isStatic() &&
|
|
(!Info.isStatic() ||
|
|
Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
|
|
assert((!PendingInsert ||
|
|
(Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
|
|
Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
|
|
"Pending VXRM insertion mismatch");
|
|
LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
|
|
<< " changing to " << BBInfo.AnticipatedOut << "\n");
|
|
BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(),
|
|
TII->get(RISCV::WriteVXRMImm))
|
|
.addImm(BBInfo.AnticipatedOut.getVXRMImm());
|
|
}
|
|
}
|
|
|
|
bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
|
|
// Skip if the vector extension is not enabled.
|
|
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
|
|
if (!ST.hasVInstructions())
|
|
return false;
|
|
|
|
TII = ST.getInstrInfo();
|
|
|
|
assert(BlockInfo.empty() && "Expect empty block infos");
|
|
BlockInfo.resize(MF.getNumBlockIDs());
|
|
|
|
// Phase 1 - collect block information.
|
|
bool NeedVXRMChange = false;
|
|
for (const MachineBasicBlock &MBB : MF)
|
|
NeedVXRMChange |= computeVXRMChanges(MBB);
|
|
|
|
if (!NeedVXRMChange) {
|
|
BlockInfo.clear();
|
|
return false;
|
|
}
|
|
|
|
// Phase 2 - Compute available VXRM using a forward walk.
|
|
for (const MachineBasicBlock &MBB : MF) {
|
|
WorkList.push(&MBB);
|
|
BlockInfo[MBB.getNumber()].InQueue = true;
|
|
}
|
|
while (!WorkList.empty()) {
|
|
const MachineBasicBlock &MBB = *WorkList.front();
|
|
WorkList.pop();
|
|
computeAvailable(MBB);
|
|
}
|
|
|
|
// Phase 3 - Compute anticipated VXRM using a backwards walk.
|
|
for (const MachineBasicBlock &MBB : llvm::reverse(MF)) {
|
|
WorkList.push(&MBB);
|
|
BlockInfo[MBB.getNumber()].InQueue = true;
|
|
}
|
|
while (!WorkList.empty()) {
|
|
const MachineBasicBlock &MBB = *WorkList.front();
|
|
WorkList.pop();
|
|
computeAnticipated(MF, MBB);
|
|
}
|
|
|
|
// Phase 4 - Emit VXRM writes at the earliest place possible.
|
|
for (MachineBasicBlock &MBB : MF)
|
|
emitWriteVXRM(MBB);
|
|
|
|
BlockInfo.clear();
|
|
|
|
return true;
|
|
}
|
|
|
|
FunctionPass *llvm::createRISCVInsertWriteVXRMPass() {
|
|
return new RISCVInsertWriteVXRM();
|
|
}
|