llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
Andrea Di Biagio d768d35515 [MC][X86] Correctly model additional operand latency caused by transfer delays from the integer to the floating point unit.
This patch adds a new ReadAdvance definition named ReadInt2Fpu.
ReadInt2Fpu allows x86 scheduling models to accurately describe delays caused by
data transfers from the integer unit to the floating point unit.
ReadInt2Fpu currently defaults to a delay of zero cycles (i.e. no delay) for all
x86 models excluding BtVer2. That means, this patch is only a functional change
for the Jaguar cpu model only.

Tablegen definitions for instructions (V)PINSR* have been updated to account for
the new ReadInt2Fpu. That read is mapped to the the GPR input operand.
On Jaguar, int-to-fpu transfers are modeled as a +6cy delay. Before this patch,
that extra delay was added to the opcode latency. In practice, the insert opcode
only executes for 1cy. Most of the actual latency is actually contributed by the
so-called operand-latency. According to the AMD SOG for family 16h, (V)PINSR*
latency is defined by expression f+1, where f is defined as a forwarding delay
from the integer unit to the fpu.

When printing instruction latency from MCA (see InstructionInfoView.cpp) and LLC
(only when flag -print-schedule is speified), we now need to account for any
extra forwarding delays. We do this by checking if scheduling classes declare
any negative ReadAdvance entries. Quoting a code comment in TargetSchedule.td:
"A negative advance effectively increases latency, which may be used for
cross-domain stalls". When computing the instruction latency for the purpose of
our scheduling tests, we now add any extra delay to the formula. This avoids
regressing existing codegen and mca schedule tests. It comes with the cost of an
extra (but very simple) hook in MCSchedModel.

Differential Revision: https://reviews.llvm.org/D57056

llvm-svn: 351965
2019-01-23 16:35:07 +00:00

130 lines
4.4 KiB
C++

//===- TargetSubtargetInfo.cpp - General Target Information ----------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This file describes the general parts of a Subtarget.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
using namespace llvm;
TargetSubtargetInfo::TargetSubtargetInfo(
const Triple &TT, StringRef CPU, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
const InstrStage *IS, const unsigned *OC, const unsigned *FP)
: MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
}
TargetSubtargetInfo::~TargetSubtargetInfo() = default;
bool TargetSubtargetInfo::enableAtomicExpand() const {
return true;
}
bool TargetSubtargetInfo::enableIndirectBrExpand() const {
return false;
}
bool TargetSubtargetInfo::enableMachineScheduler() const {
return false;
}
bool TargetSubtargetInfo::enableJoinGlobalCopies() const {
return enableMachineScheduler();
}
bool TargetSubtargetInfo::enableRALocalReassignment(
CodeGenOpt::Level OptLevel) const {
return true;
}
bool TargetSubtargetInfo::enableAdvancedRASplitCost() const {
return false;
}
bool TargetSubtargetInfo::enablePostRAScheduler() const {
return getSchedModel().PostRAScheduler;
}
bool TargetSubtargetInfo::useAA() const {
return false;
}
static std::string createSchedInfoStr(unsigned Latency, double RThroughput) {
static const char *SchedPrefix = " sched: [";
std::string Comment;
raw_string_ostream CS(Comment);
if (RThroughput != 0.0)
CS << SchedPrefix << Latency << format(":%2.2f", RThroughput)
<< "]";
else
CS << SchedPrefix << Latency << ":?]";
CS.flush();
return Comment;
}
/// Returns string representation of scheduler comment
std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
if (MI.isPseudo() || MI.isTerminator())
return std::string();
// We don't cache TSchedModel because it depends on TargetInstrInfo
// that could be changed during the compilation
TargetSchedModel TSchedModel;
TSchedModel.init(this);
unsigned Latency = TSchedModel.computeInstrLatency(&MI);
// Add extra latency due to forwarding delays.
const MCSchedClassDesc &SCDesc = *TSchedModel.resolveSchedClass(&MI);
Latency +=
MCSchedModel::getForwardingDelayCycles(getReadAdvanceEntries(SCDesc));
double RThroughput = TSchedModel.computeReciprocalThroughput(&MI);
return createSchedInfoStr(Latency, RThroughput);
}
/// Returns string representation of scheduler comment
std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
// We don't cache TSchedModel because it depends on TargetInstrInfo
// that could be changed during the compilation
TargetSchedModel TSchedModel;
TSchedModel.init(this);
unsigned Latency;
if (TSchedModel.hasInstrSchedModel()) {
Latency = TSchedModel.computeInstrLatency(MCI);
// Add extra latency due to forwarding delays.
const MCSchedModel &SM = *TSchedModel.getMCSchedModel();
unsigned SClassID = getInstrInfo()->get(MCI.getOpcode()).getSchedClass();
while (SM.getSchedClassDesc(SClassID)->isVariant())
SClassID = resolveVariantSchedClass(SClassID, &MCI, SM.ProcID);
const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SClassID);
Latency +=
MCSchedModel::getForwardingDelayCycles(getReadAdvanceEntries(SCDesc));
} else if (TSchedModel.hasInstrItineraries()) {
auto *ItinData = TSchedModel.getInstrItineraries();
Latency = ItinData->getStageLatency(
getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
} else
return std::string();
double RThroughput = TSchedModel.computeReciprocalThroughput(MCI);
return createSchedInfoStr(Latency, RThroughput);
}
void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const {
}