mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 11:46:08 +00:00
353 lines
12 KiB
C++
353 lines
12 KiB
C++
//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/IR/StructuralHash.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/GlobalVariable.h"
|
|
#include "llvm/IR/InstrTypes.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/Module.h"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
// Basic hashing mechanism to detect structural change to the IR, used to verify
|
|
// pass return status consistency with actual change. In addition to being used
|
|
// by the MergeFunctions pass.
|
|
|
|
class StructuralHashImpl {
|
|
stable_hash Hash = 4;
|
|
|
|
bool DetailedHash;
|
|
|
|
// This random value acts as a block header, as otherwise the partition of
|
|
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
|
|
static constexpr stable_hash BlockHeaderHash = 45798;
|
|
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
|
|
static constexpr stable_hash GlobalHeaderHash = 23456;
|
|
|
|
/// IgnoreOp is a function that returns true if the operand should be ignored.
|
|
IgnoreOperandFunc IgnoreOp = nullptr;
|
|
/// A mapping from instruction indices to instruction pointers.
|
|
/// The index represents the position of an instruction based on the order in
|
|
/// which it is first encountered.
|
|
std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
|
|
/// A mapping from pairs of instruction indices and operand indices
|
|
/// to the hashes of the operands.
|
|
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
|
|
|
|
/// Assign a unique ID to each Value in the order they are first seen.
|
|
DenseMap<const Value *, int> ValueToId;
|
|
|
|
static stable_hash hashType(Type *ValueType) {
|
|
SmallVector<stable_hash> Hashes;
|
|
Hashes.emplace_back(ValueType->getTypeID());
|
|
if (ValueType->isIntegerTy())
|
|
Hashes.emplace_back(ValueType->getIntegerBitWidth());
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
|
|
public:
|
|
StructuralHashImpl() = delete;
|
|
explicit StructuralHashImpl(bool DetailedHash,
|
|
IgnoreOperandFunc IgnoreOp = nullptr)
|
|
: DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
|
|
if (IgnoreOp) {
|
|
IndexInstruction = std::make_unique<IndexInstrMap>();
|
|
IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
|
|
}
|
|
}
|
|
|
|
static stable_hash hashAPInt(const APInt &I) {
|
|
SmallVector<stable_hash> Hashes;
|
|
Hashes.emplace_back(I.getBitWidth());
|
|
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
|
|
Hashes.append(RawVals.begin(), RawVals.end());
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
|
|
static stable_hash hashAPFloat(const APFloat &F) {
|
|
return hashAPInt(F.bitcastToAPInt());
|
|
}
|
|
|
|
static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
|
|
if (!GVar.hasInitializer())
|
|
return hashGlobalValue(&GVar);
|
|
|
|
// Hash the contents of a string.
|
|
if (GVar.getName().starts_with(".str")) {
|
|
auto *C = GVar.getInitializer();
|
|
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C))
|
|
if (Seq->isString())
|
|
return stable_hash_name(Seq->getAsString());
|
|
}
|
|
|
|
// Hash structural contents of Objective-C metadata in specific sections.
|
|
// This can be extended to other metadata if needed.
|
|
static constexpr const char *SectionNames[] = {
|
|
"__cfstring", "__cstring", "__objc_classrefs",
|
|
"__objc_methname", "__objc_selrefs",
|
|
};
|
|
if (GVar.hasSection()) {
|
|
StringRef SectionName = GVar.getSection();
|
|
for (const char *Name : SectionNames)
|
|
if (SectionName.contains(Name))
|
|
return hashConstant(GVar.getInitializer());
|
|
}
|
|
|
|
return hashGlobalValue(&GVar);
|
|
}
|
|
|
|
static stable_hash hashGlobalValue(const GlobalValue *GV) {
|
|
if (!GV->hasName())
|
|
return 0;
|
|
return stable_hash_name(GV->getName());
|
|
}
|
|
|
|
// Compute a hash for a Constant. This function is logically similar to
|
|
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
|
|
// we're interested in computing a hash rather than comparing two Constants.
|
|
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
|
|
static stable_hash hashConstant(const Constant *C) {
|
|
SmallVector<stable_hash> Hashes;
|
|
|
|
Type *Ty = C->getType();
|
|
Hashes.emplace_back(hashType(Ty));
|
|
|
|
if (C->isNullValue()) {
|
|
Hashes.emplace_back(static_cast<stable_hash>('N'));
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
|
|
if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
|
|
Hashes.emplace_back(hashGlobalVariable(*GVar));
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
|
|
if (auto *G = dyn_cast<GlobalValue>(C)) {
|
|
Hashes.emplace_back(hashGlobalValue(G));
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
|
|
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
|
|
if (Seq->isString()) {
|
|
Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
}
|
|
|
|
switch (C->getValueID()) {
|
|
case Value::ConstantIntVal: {
|
|
const APInt &Int = cast<ConstantInt>(C)->getValue();
|
|
Hashes.emplace_back(hashAPInt(Int));
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
case Value::ConstantFPVal: {
|
|
const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
|
|
Hashes.emplace_back(hashAPFloat(APF));
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
case Value::ConstantArrayVal:
|
|
case Value::ConstantStructVal:
|
|
case Value::ConstantVectorVal:
|
|
case Value::ConstantExprVal: {
|
|
for (const auto &Op : C->operands()) {
|
|
auto H = hashConstant(cast<Constant>(Op));
|
|
Hashes.emplace_back(H);
|
|
}
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
case Value::BlockAddressVal: {
|
|
const BlockAddress *BA = cast<BlockAddress>(C);
|
|
auto H = hashGlobalValue(BA->getFunction());
|
|
Hashes.emplace_back(H);
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
case Value::DSOLocalEquivalentVal: {
|
|
const auto *Equiv = cast<DSOLocalEquivalent>(C);
|
|
auto H = hashGlobalValue(Equiv->getGlobalValue());
|
|
Hashes.emplace_back(H);
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
default:
|
|
// Skip other types of constants for simplicity.
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
}
|
|
|
|
stable_hash hashValue(Value *V) {
|
|
// Check constant and return its hash.
|
|
Constant *C = dyn_cast<Constant>(V);
|
|
if (C)
|
|
return hashConstant(C);
|
|
|
|
// Hash argument number.
|
|
SmallVector<stable_hash> Hashes;
|
|
if (Argument *Arg = dyn_cast<Argument>(V))
|
|
Hashes.emplace_back(Arg->getArgNo());
|
|
|
|
// Get an index (an insertion order) for the non-constant value.
|
|
auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
|
|
Hashes.emplace_back(It->second);
|
|
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
|
|
stable_hash hashOperand(Value *Operand) {
|
|
SmallVector<stable_hash> Hashes;
|
|
Hashes.emplace_back(hashType(Operand->getType()));
|
|
Hashes.emplace_back(hashValue(Operand));
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
|
|
stable_hash hashInstruction(const Instruction &Inst) {
|
|
SmallVector<stable_hash> Hashes;
|
|
Hashes.emplace_back(Inst.getOpcode());
|
|
|
|
if (!DetailedHash)
|
|
return stable_hash_combine(Hashes);
|
|
|
|
Hashes.emplace_back(hashType(Inst.getType()));
|
|
|
|
// Handle additional properties of specific instructions that cause
|
|
// semantic differences in the IR.
|
|
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
|
|
Hashes.emplace_back(ComparisonInstruction->getPredicate());
|
|
|
|
unsigned InstIdx = 0;
|
|
if (IndexInstruction) {
|
|
InstIdx = IndexInstruction->size();
|
|
IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
|
|
}
|
|
|
|
for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
|
|
auto OpndHash = hashOperand(Op);
|
|
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
|
|
assert(IndexOperandHashMap);
|
|
IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
|
|
} else
|
|
Hashes.emplace_back(OpndHash);
|
|
}
|
|
|
|
return stable_hash_combine(Hashes);
|
|
}
|
|
|
|
// A function hash is calculated by considering only the number of arguments
|
|
// and whether a function is varargs, the order of basic blocks (given by the
|
|
// successors of each basic block in depth first order), and the order of
|
|
// opcodes of each instruction within each of these basic blocks. This mirrors
|
|
// the strategy FunctionComparator::compare() uses to compare functions by
|
|
// walking the BBs in depth first order and comparing each instruction in
|
|
// sequence. Because this hash currently does not look at the operands, it is
|
|
// insensitive to things such as the target of calls and the constants used in
|
|
// the function, which makes it useful when possibly merging functions which
|
|
// are the same modulo constants and call targets.
|
|
//
|
|
// Note that different users of StructuralHash will want different behavior
|
|
// out of it (i.e., MergeFunctions will want something different from PM
|
|
// expensive checks for pass modification status). When modifying this
|
|
// function, most changes should be gated behind an option and enabled
|
|
// selectively.
|
|
void update(const Function &F) {
|
|
// Declarations don't affect analyses.
|
|
if (F.isDeclaration())
|
|
return;
|
|
|
|
SmallVector<stable_hash> Hashes;
|
|
Hashes.emplace_back(Hash);
|
|
Hashes.emplace_back(FunctionHeaderHash);
|
|
|
|
Hashes.emplace_back(F.isVarArg());
|
|
Hashes.emplace_back(F.arg_size());
|
|
|
|
SmallVector<const BasicBlock *, 8> BBs;
|
|
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
|
|
|
|
// Walk the blocks in the same order as
|
|
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
|
|
// function "structure." (BB and opcode sequence)
|
|
BBs.push_back(&F.getEntryBlock());
|
|
VisitedBBs.insert(BBs[0]);
|
|
while (!BBs.empty()) {
|
|
const BasicBlock *BB = BBs.pop_back_val();
|
|
|
|
Hashes.emplace_back(BlockHeaderHash);
|
|
for (auto &Inst : *BB)
|
|
Hashes.emplace_back(hashInstruction(Inst));
|
|
|
|
for (const BasicBlock *Succ : successors(BB))
|
|
if (VisitedBBs.insert(Succ).second)
|
|
BBs.push_back(Succ);
|
|
}
|
|
|
|
// Update the combined hash in place.
|
|
Hash = stable_hash_combine(Hashes);
|
|
}
|
|
|
|
void update(const GlobalVariable &GV) {
|
|
// Declarations and used/compiler.used don't affect analyses.
|
|
// Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
|
|
// we ignore anything with the `.llvm` prefix
|
|
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
|
|
return;
|
|
SmallVector<stable_hash> Hashes;
|
|
Hashes.emplace_back(Hash);
|
|
Hashes.emplace_back(GlobalHeaderHash);
|
|
Hashes.emplace_back(GV.getValueType()->getTypeID());
|
|
|
|
// Update the combined hash in place.
|
|
Hash = stable_hash_combine(Hashes);
|
|
}
|
|
|
|
void update(const Module &M) {
|
|
for (const GlobalVariable &GV : M.globals())
|
|
update(GV);
|
|
for (const Function &F : M)
|
|
update(F);
|
|
}
|
|
|
|
uint64_t getHash() const { return Hash; }
|
|
|
|
std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
|
|
return std::move(IndexInstruction);
|
|
}
|
|
|
|
std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
|
|
return std::move(IndexOperandHashMap);
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
|
|
StructuralHashImpl H(DetailedHash);
|
|
H.update(F);
|
|
return H.getHash();
|
|
}
|
|
|
|
stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
|
|
return StructuralHashImpl::hashGlobalVariable(GVar);
|
|
}
|
|
|
|
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
|
|
StructuralHashImpl H(DetailedHash);
|
|
H.update(M);
|
|
return H.getHash();
|
|
}
|
|
|
|
FunctionHashInfo
|
|
llvm::StructuralHashWithDifferences(const Function &F,
|
|
IgnoreOperandFunc IgnoreOp) {
|
|
StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
|
|
H.update(F);
|
|
return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
|
|
H.getIndexPairOpndHashMap());
|
|
}
|