[KeyInstr][Clang] Add ApplyAtomGroup

This is a scoped helper similar to ApplyDebugLocation that creates a new source
atom group which instructions can be added to.

A source atom is a source construct that is "interesting" for debug stepping
purposes. We use an atom group number to track the instruction(s) that implement
the functionality for the atom, plus backup instructions/source locations.

---

This patch is part of a stack that teaches Clang to generate Key Instructions
metadata for C and C++.

The Key Instructions project is introduced, including a "quick summary" section
at the top which adds context for this PR, here:
https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668

The feature is only functional in LLVM if LLVM is built with CMake flag
LLVM_EXPERIMENTAL_KEY_INSTRUCTIONs. Eventually that flag will be removed.

The Clang-side work is demoed here:
https://github.com/llvm/llvm-project/pull/130943
This commit is contained in:
Orlando Cazalet-Hyams 2025-04-01 11:59:24 +01:00
parent 7132dd30a1
commit 856f99ea6b
3 changed files with 182 additions and 1 deletions

View File

@ -43,6 +43,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
@ -52,6 +53,7 @@
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SHA256.h"
#include "llvm/Support/TimeProfiler.h"
#include <cstdint>
#include <optional>
using namespace clang;
using namespace clang::CodeGen;
@ -119,6 +121,114 @@ CGDebugInfo::~CGDebugInfo() {
"Region stack mismatch, stack not empty!");
}
void CGDebugInfo::addInstSourceAtomMetadata(llvm::Instruction *I,
uint64_t Group, uint8_t Rank) {
if (!I->getDebugLoc() || Group == 0 || !I->getDebugLoc()->getLine())
return;
// Saturate the 3-bit rank.
Rank = std::min<uint8_t>(Rank, 7);
const llvm::DebugLoc &DL = I->getDebugLoc();
// Each instruction can only be attributed to one source atom (a limitation of
// the implementation). If this instruction is already part of a source atom,
// pick the group in which it has highest precedence (lowest rank).
if (DL.get()->getAtomGroup() && DL.get()->getAtomRank() &&
DL.get()->getAtomRank() < Rank) {
Group = DL.get()->getAtomGroup();
Rank = DL.get()->getAtomRank();
}
// Update the function-local watermark so we don't reuse this number for
// another atom.
KeyInstructionsInfo.HighestEmittedAtom =
std::max(Group, KeyInstructionsInfo.HighestEmittedAtom);
// Apply the new DILocation to the instruction.
llvm::DILocation *NewDL = llvm::DILocation::get(
I->getContext(), DL.getLine(), DL.getCol(), DL.getScope(),
DL.getInlinedAt(), DL.isImplicitCode(), Group, Rank);
I->setDebugLoc(NewDL);
};
void CGDebugInfo::addInstToCurrentSourceAtom(llvm::Instruction *KeyInstruction,
llvm::Value *Backup) {
if (!CGM.getCodeGenOpts().DebugKeyInstructions)
return;
uint64_t Group = KeyInstructionsInfo.CurrentAtom;
if (!Group)
return;
addInstSourceAtomMetadata(KeyInstruction, Group, /*Rank=*/1);
llvm::Instruction *BackupI =
llvm::dyn_cast_or_null<llvm::Instruction>(Backup);
if (!BackupI)
return;
// Add the backup instruction to the group.
addInstSourceAtomMetadata(BackupI, Group, /*Rank=*/2);
// Look through chains of casts too, as they're probably going to evaporate.
// FIXME: And other nops like zero length geps?
// FIXME: Should use Cast->isNoopCast()?
uint8_t Rank = 3;
while (auto *Cast = dyn_cast<llvm::CastInst>(BackupI)) {
BackupI = dyn_cast<llvm::Instruction>(Cast->getOperand(0));
if (!BackupI)
break;
addInstSourceAtomMetadata(BackupI, Group, Rank++);
}
}
void CGDebugInfo::addRetToOverrideOrNewSourceAtom(llvm::ReturnInst *Ret,
llvm::Value *Backup) {
if (KeyInstructionsInfo.RetAtomOverride) {
uint64_t CurrentAtom = KeyInstructionsInfo.CurrentAtom;
KeyInstructionsInfo.CurrentAtom = KeyInstructionsInfo.RetAtomOverride;
addInstToCurrentSourceAtom(Ret, Backup);
KeyInstructionsInfo.CurrentAtom = CurrentAtom;
KeyInstructionsInfo.RetAtomOverride = 0;
} else {
auto Grp = ApplyAtomGroup(this);
addInstToCurrentSourceAtom(Ret, Backup);
}
}
void CGDebugInfo::setRetInstSourceAtomOverride(uint64_t Group) {
assert(KeyInstructionsInfo.RetAtomOverride == 0);
KeyInstructionsInfo.RetAtomOverride = Group;
}
void CGDebugInfo::completeFunction() {
// Reset the atom group number tracker as the numbers are function-local.
KeyInstructionsInfo.NextAtom = 1;
KeyInstructionsInfo.HighestEmittedAtom = 0;
KeyInstructionsInfo.CurrentAtom = 0;
KeyInstructionsInfo.RetAtomOverride = 0;
}
ApplyAtomGroup::ApplyAtomGroup(CGDebugInfo *DI) : DI(DI) {
if (!DI)
return;
OriginalAtom = DI->KeyInstructionsInfo.CurrentAtom;
DI->KeyInstructionsInfo.CurrentAtom = DI->KeyInstructionsInfo.NextAtom++;
}
ApplyAtomGroup::~ApplyAtomGroup() {
if (!DI)
return;
// We may not have used the group number at all.
DI->KeyInstructionsInfo.NextAtom =
std::min(DI->KeyInstructionsInfo.HighestEmittedAtom + 1,
DI->KeyInstructionsInfo.NextAtom);
DI->KeyInstructionsInfo.CurrentAtom = OriginalAtom;
}
ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF,
SourceLocation TemporaryLocation)
: CGF(&CGF) {
@ -174,8 +284,15 @@ ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, llvm::DebugLoc Loc)
return;
}
OriginalLocation = CGF.Builder.getCurrentDebugLocation();
if (Loc)
if (Loc) {
// Key Instructions: drop the atom group and rank to avoid accidentally
// propagating it around.
if (Loc->getAtomGroup())
Loc = llvm::DILocation::get(Loc->getContext(), Loc.getLine(),
Loc->getColumn(), Loc->getScope(),
Loc->getInlinedAt(), Loc.isImplicitCode());
CGF.Builder.SetCurrentDebugLocation(std::move(Loc));
}
}
ApplyDebugLocation::~ApplyDebugLocation() {

View File

@ -58,6 +58,8 @@ class CGBlockInfo;
class CGDebugInfo {
friend class ApplyDebugLocation;
friend class SaveAndRestoreLocation;
friend class ApplyAtomGroup;
CodeGenModule &CGM;
const llvm::codegenoptions::DebugInfoKind DebugKind;
bool DebugTypeExtRefs;
@ -179,6 +181,17 @@ class CGDebugInfo {
/// The key is coroutine real parameters, value is DIVariable in LLVM IR.
Param2DILocTy ParamDbgMappings;
/// Key Instructions bookkeeping.
/// Source atoms are identified by a {AtomGroup, InlinedAt} pair, meaning
/// AtomGroup numbers can be repeated across different functions.
struct {
uint64_t NextAtom = 1;
uint64_t HighestEmittedAtom = 0;
uint64_t CurrentAtom = 0;
uint64_t RetAtomOverride = 0;
} KeyInstructionsInfo;
private:
/// Helper functions for getOrCreateType.
/// @{
/// Currently the checksum of an interface includes the number of
@ -636,7 +649,30 @@ public:
StringRef Category,
StringRef FailureMsg);
/// Reset internal state.
void completeFunction();
/// Add \p KeyInstruction and an optional \p Backup instruction to the
/// current atom group, created using ApplyAtomGroup.
void addInstToCurrentSourceAtom(llvm::Instruction *KeyInstruction,
llvm::Value *Backup);
/// Add \p Ret and an optional \p Backup instruction to the
/// saved override used for some ret instructions if it exists, or a new atom.
void addRetToOverrideOrNewSourceAtom(llvm::ReturnInst *Ret,
llvm::Value *Backup);
/// Set an atom group override for use in addRetToOverrideOrNewSourceAtom.
void setRetInstSourceAtomOverride(uint64_t Group);
private:
/// Amend \p I's DebugLoc with \p Group (its source atom group) and \p
/// Rank (lower nonzero rank is higher precedence). Does nothing if \p I
/// has no DebugLoc, and chooses the atom group in which the instruction
/// has the highest precedence if it's already in one.
void addInstSourceAtomMetadata(llvm::Instruction *I, uint64_t Group,
uint8_t Rank);
/// Emit call to llvm.dbg.declare for a variable declaration.
/// Returns a pointer to the DILocalVariable associated with the
/// llvm.dbg.declare, or nullptr otherwise.
@ -853,6 +889,20 @@ private:
}
};
/// A scoped helper to set the current source atom group for
/// CGDebugInfo::addInstToCurrentSourceAtom. A source atom is a source construct
/// that is "interesting" for debug stepping purposes. We use an atom group
/// number to track the instruction(s) that implement the functionality for the
/// atom, plus backup instructions/source locations.
class ApplyAtomGroup {
uint64_t OriginalAtom = 0;
CGDebugInfo *DI = nullptr;
public:
ApplyAtomGroup(CGDebugInfo *DI);
~ApplyAtomGroup();
};
/// A scoped helper to set the current debug location to the specified
/// location or preferred location of the specified Expr.
class ApplyDebugLocation {

View File

@ -1761,6 +1761,20 @@ public:
/// recently incremented counter.
uint64_t getCurrentProfileCount() { return PGO.getCurrentRegionCount(); }
/// See CGDebugInfo::addInstToCurrentSourceAtom.
void addInstToCurrentSourceAtom(llvm::Instruction *KeyInstruction,
llvm::Value *Backup) {
if (CGDebugInfo *DI = getDebugInfo())
DI->addInstToCurrentSourceAtom(KeyInstruction, Backup);
}
/// See CGDebugInfo::addRetToOverrideOrNewSourceAtom.
void addRetToOverrideOrNewSourceAtom(llvm::ReturnInst *Ret,
llvm::Value *Backup) {
if (CGDebugInfo *DI = getDebugInfo())
DI->addRetToOverrideOrNewSourceAtom(Ret, Backup);
}
private:
/// SwitchInsn - This is nearest current switch instruction. It is null if
/// current context is not in a switch.