llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp

//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass inserts writes to the VXRM CSR as needed by vector instructions.
// Each instruction that uses VXRM carries an operand that contains its required
// VXRM value. This pass tries to optimize placement to avoid redundant writes
// to VXRM.
//
// This is done using 2 dataflow algorithms. The first is a forward data flow
// to calculate where a VXRM value is available. The second is a backwards
// dataflow to determine where a VXRM value is anticipated.
//
// Finally, we use the results of these two dataflows to insert VXRM writes
// where a value is anticipated, but not available.
//
// FIXME: This pass does not split critical edges, so there can still be some
// redundancy.
//
// FIXME: If we are willing to have writes that aren't always needed, we could
// reduce the number of VXRM writes in some cases.
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/RISCVBaseInfo.h"
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>

using namespace llvm;

#define DEBUG_TYPE "riscv-insert-write-vxrm"
#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"

namespace {

class VXRMInfo {
  uint8_t VXRMImm = 0;

  enum : uint8_t {
    Uninitialized,
    Static,
    Unknown,
  } State = Uninitialized;

public:
  VXRMInfo() {}

  static VXRMInfo getUnknown() {
    VXRMInfo Info;
    Info.setUnknown();
    return Info;
  }

  bool isValid() const { return State != Uninitialized; }
  void setUnknown() { State = Unknown; }
  bool isUnknown() const { return State == Unknown; }

  bool isStatic() const { return State == Static; }

  void setVXRMImm(unsigned Imm) {
    assert(Imm <= 3 && "Unexpected VXRM value");
    VXRMImm = Imm;
    State = Static;
  }
  unsigned getVXRMImm() const {
    assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
    return VXRMImm;
  }

  bool operator==(const VXRMInfo &Other) const {
    // Uninitialized is only equal to another Uninitialized.
    if (State != Other.State)
      return false;

    if (isStatic())
      return VXRMImm == Other.VXRMImm;

    assert((isValid() || isUnknown()) && "Unexpected state");
    return true;
  }

  bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }

  // Calculate the VXRMInfo visible to a block assuming this and Other are
  // both predecessors.
  VXRMInfo intersect(const VXRMInfo &Other) const {
    // If the new value isn't valid, ignore it.
    if (!Other.isValid())
      return *this;

    // If this value isn't valid, this must be the first predecessor, use it.
    if (!isValid())
      return Other;

    // If either is unknown, the result is unknown.
    if (isUnknown() || Other.isUnknown())
      return VXRMInfo::getUnknown();

    // If we have an exact match, return this.
    if (*this == Other)
      return *this;

    // Otherwise the result is unknown.
    return VXRMInfo::getUnknown();
  }

  // Calculate the VXRMInfo visible to a block assuming this and Other
  // are both predecessors. To allow speculatively running WriteVXRM
  // we will ignore Unknowns if one of this and Other have valid
  // WriteVXRM. Rationale: WriteVXRM causes a pipeline flush in some
  // uarchs and moving it outside loops is very important for some
  // workloads.
  VXRMInfo intersectAnticipated(const VXRMInfo &Other) const {
    // If the new value isn't valid, ignore it.
    if (!Other.isValid())
      return *this;

    // If this value isn't valid, this must be the first predecessor, use it.
    if (!isValid())
      return Other;

    // If either is unknown, the result is the other one.
    if (isUnknown())
      return Other;
    if (Other.isUnknown())
      return *this;

    // If we have an exact match, return this.
    if (*this == Other)
      return *this;

    // Otherwise the result is unknown.
    return VXRMInfo::getUnknown();
  }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  /// Support for debugging, callable in GDB: V->dump()
  LLVM_DUMP_METHOD void dump() const {
    print(dbgs());
    dbgs() << "\n";
  }

  void print(raw_ostream &OS) const {
    OS << '{';
    if (!isValid())
      OS << "Uninitialized";
    else if (isUnknown())
      OS << "Unknown";
    else
      OS << getVXRMImm();
    OS << '}';
  }
#endif
};

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_ATTRIBUTE_USED
inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
  V.print(OS);
  return OS;
}
#endif

struct BlockData {
  // Indicates if the block uses VXRM. Uninitialized means no use.
  VXRMInfo VXRMUse;

  // Indicates the VXRM output from the block. Unitialized means transparent.
  VXRMInfo VXRMOut;

  // Keeps track of the available VXRM value at the start of the basic bloc.
  VXRMInfo AvailableIn;

  // Keeps track of the available VXRM value at the end of the basic block.
  VXRMInfo AvailableOut;

  // Keeps track of what VXRM is anticipated at the start of the basic block.
  VXRMInfo AnticipatedIn;

  // Keeps track of what VXRM is anticipated at the end of the basic block.
  VXRMInfo AnticipatedOut;

  // Keeps track of whether the block is already in the queue.
  bool InQueue;

  BlockData() = default;
};

class RISCVInsertWriteVXRM : public MachineFunctionPass {
  const TargetInstrInfo *TII;

  std::vector<BlockData> BlockInfo;
  std::queue<const MachineBasicBlock *> WorkList;

public:
  static char ID;

  RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}

  bool runOnMachineFunction(MachineFunction &MF) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.setPreservesCFG();
    MachineFunctionPass::getAnalysisUsage(AU);
  }

  StringRef getPassName() const override {
    return RISCV_INSERT_WRITE_VXRM_NAME;
  }

private:
  bool computeVXRMChanges(const MachineBasicBlock &MBB);
  void computeAvailable(const MachineBasicBlock &MBB);
  void computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB);
  void emitWriteVXRM(MachineBasicBlock &MBB);
};

} // end anonymous namespace

char RISCVInsertWriteVXRM::ID = 0;

INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
                false, false)

static bool ignoresVXRM(const MachineInstr &MI) {
  switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
  default:
    return false;
  case RISCV::VNCLIP_WI:
  case RISCV::VNCLIPU_WI:
    return MI.getOperand(3).getImm() == 0;
  }
}

bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
  BlockData &BBInfo = BlockInfo[MBB.getNumber()];

  bool NeedVXRMWrite = false;
  for (const MachineInstr &MI : MBB) {
    int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
    if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
      unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();

      if (!BBInfo.VXRMUse.isValid())
        BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);

      BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
      NeedVXRMWrite = true;
      continue;
    }

    if (MI.isCall() || MI.isInlineAsm() ||
        MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) {
      if (!BBInfo.VXRMUse.isValid())
        BBInfo.VXRMUse.setUnknown();

      BBInfo.VXRMOut.setUnknown();
    }
  }

  return NeedVXRMWrite;
}

void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
  BlockData &BBInfo = BlockInfo[MBB.getNumber()];

  BBInfo.InQueue = false;

  VXRMInfo Available;
  if (MBB.pred_empty()) {
    Available.setUnknown();
  } else {
    for (const MachineBasicBlock *P : MBB.predecessors())
      Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
  }

  // If we don't have any valid available info, wait until we do.
  if (!Available.isValid())
    return;

  if (Available != BBInfo.AvailableIn) {
    BBInfo.AvailableIn = Available;
    LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
                      << " changed to " << BBInfo.AvailableIn << "\n");
  }

  if (BBInfo.VXRMOut.isValid())
    Available = BBInfo.VXRMOut;

  if (Available == BBInfo.AvailableOut)
    return;

  BBInfo.AvailableOut = Available;
  LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
                    << " changed to " << BBInfo.AvailableOut << "\n");

  // Add the successors to the work list so that we can propagate.
  for (MachineBasicBlock *S : MBB.successors()) {
    if (!BlockInfo[S->getNumber()].InQueue) {
      BlockInfo[S->getNumber()].InQueue = true;
      WorkList.push(S);
    }
  }
}

void RISCVInsertWriteVXRM::computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB) {
  BlockData &BBInfo = BlockInfo[MBB.getNumber()];
  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();

  BBInfo.InQueue = false;

  VXRMInfo Anticipated;
  if (MBB.succ_empty()) {
    Anticipated.setUnknown();
  } else {
    for (const MachineBasicBlock *S : MBB.successors())
      if (ST.hasVXRMPipelineFlush())
        Anticipated =
          Anticipated.intersectAnticipated(BlockInfo[S->getNumber()].AnticipatedIn);
      else
        Anticipated =
          Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
  }

  // If we don't have any valid anticipated info, wait until we do.
  if (!Anticipated.isValid())
    return;

  if (Anticipated != BBInfo.AnticipatedOut) {
    BBInfo.AnticipatedOut = Anticipated;
    LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
                      << " changed to " << BBInfo.AnticipatedOut << "\n");
  }

  // If this block reads VXRM, copy it.
  if (BBInfo.VXRMUse.isValid())
    Anticipated = BBInfo.VXRMUse;

  if (Anticipated == BBInfo.AnticipatedIn)
    return;

  BBInfo.AnticipatedIn = Anticipated;
  LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
                    << " changed to " << BBInfo.AnticipatedIn << "\n");

  // Add the predecessors to the work list so that we can propagate.
  for (MachineBasicBlock *P : MBB.predecessors()) {
    if (!BlockInfo[P->getNumber()].InQueue) {
      BlockInfo[P->getNumber()].InQueue = true;
      WorkList.push(P);
    }
  }
}

void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
  const BlockData &BBInfo = BlockInfo[MBB.getNumber()];

  VXRMInfo Info = BBInfo.AvailableIn;

  // Flag to indicates we need to insert a VXRM write. We want to delay it as
  // late as possible in this block.
  bool PendingInsert = false;

  // Insert VXRM write if anticipated and not available.
  if (BBInfo.AnticipatedIn.isStatic()) {
    // If this is the entry block and the value is anticipated, insert.
    if (MBB.isEntryBlock()) {
      PendingInsert = true;
    } else {
      // Search for any predecessors that wouldn't satisfy our requirement and
      // insert a write VXRM if needed.
      // NOTE: If one predecessor is able to provide the requirement, but
      // another isn't, it means we have a critical edge. The better placement
      // would be to split the critical edge.
      for (MachineBasicBlock *P : MBB.predecessors()) {
        const BlockData &PInfo = BlockInfo[P->getNumber()];
        // If it's available out of the predecessor, then we're ok.
        if (PInfo.AvailableOut.isStatic() &&
            PInfo.AvailableOut.getVXRMImm() ==
                BBInfo.AnticipatedIn.getVXRMImm())
          continue;
        // If the predecessor anticipates this value for all its succesors,
        // then a write to VXRM would have already occured before this block is
        // executed.
        if (PInfo.AnticipatedOut.isStatic() &&
            PInfo.AnticipatedOut.getVXRMImm() ==
                BBInfo.AnticipatedIn.getVXRMImm())
          continue;
        PendingInsert = true;
        break;
      }
    }

    Info = BBInfo.AnticipatedIn;
  }

  for (MachineInstr &MI : MBB) {
    int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
    if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
      unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();

      if (PendingInsert || !Info.isStatic() ||
          Info.getVXRMImm() != NewVXRMImm) {
        assert((!PendingInsert ||
                (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
               "Pending VXRM insertion mismatch");
        LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
        BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
            .addImm(NewVXRMImm);
        PendingInsert = false;
      }

      MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
                                              /*IsImp*/ true));
      Info.setVXRMImm(NewVXRMImm);
      continue;
    }

    if (MI.isCall() || MI.isInlineAsm() ||
        MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr))
      Info.setUnknown();
  }

  // If all our successors anticipate a value, do the insert.
  // NOTE: It's possible that not all predecessors of our successor provide the
  // correct value. This can occur on critical edges. If we don't split the
  // critical edge we'll also have a write vxrm in the succesor that is
  // redundant with this one.
  if (PendingInsert ||
      (BBInfo.AnticipatedOut.isStatic() &&
       (!Info.isStatic() ||
        Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
    assert((!PendingInsert ||
            (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
             Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
           "Pending VXRM insertion mismatch");
    LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
                      << " changing to " << BBInfo.AnticipatedOut << "\n");
    BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(),
            TII->get(RISCV::WriteVXRMImm))
        .addImm(BBInfo.AnticipatedOut.getVXRMImm());
  }
}

bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
  // Skip if the vector extension is not enabled.
  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
  if (!ST.hasVInstructions())
    return false;

  TII = ST.getInstrInfo();

  assert(BlockInfo.empty() && "Expect empty block infos");
  BlockInfo.resize(MF.getNumBlockIDs());

  // Phase 1 - collect block information.
  bool NeedVXRMChange = false;
  for (const MachineBasicBlock &MBB : MF)
    NeedVXRMChange |= computeVXRMChanges(MBB);

  if (!NeedVXRMChange) {
    BlockInfo.clear();
    return false;
  }

  // Phase 2 - Compute available VXRM using a forward walk.
  for (const MachineBasicBlock &MBB : MF) {
    WorkList.push(&MBB);
    BlockInfo[MBB.getNumber()].InQueue = true;
  }
  while (!WorkList.empty()) {
    const MachineBasicBlock &MBB = *WorkList.front();
    WorkList.pop();
    computeAvailable(MBB);
  }

  // Phase 3 - Compute anticipated VXRM using a backwards walk.
  for (const MachineBasicBlock &MBB : llvm::reverse(MF)) {
    WorkList.push(&MBB);
    BlockInfo[MBB.getNumber()].InQueue = true;
  }
  while (!WorkList.empty()) {
    const MachineBasicBlock &MBB = *WorkList.front();
    WorkList.pop();
    computeAnticipated(MF, MBB);
  }

  // Phase 4 - Emit VXRM writes at the earliest place possible.
  for (MachineBasicBlock &MBB : MF)
    emitWriteVXRM(MBB);

  BlockInfo.clear();

  return true;
}

FunctionPass *llvm::createRISCVInsertWriteVXRMPass() {
  return new RISCVInsertWriteVXRM();
}