mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-30 01:36:05 +00:00
167 lines
5.7 KiB
C++
167 lines
5.7 KiB
C++
//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/IR/StructuralHash.h"
|
|
#include "llvm/ADT/Hashing.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/GlobalVariable.h"
|
|
#include "llvm/IR/InstrTypes.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/Module.h"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
// Basic hashing mechanism to detect structural change to the IR, used to verify
|
|
// pass return status consistency with actual change. In addition to being used
|
|
// by the MergeFunctions pass.
|
|
|
|
class StructuralHashImpl {
|
|
uint64_t Hash = 4;
|
|
|
|
void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
|
|
|
|
// This will produce different values on 32-bit and 64-bit systens as
|
|
// hash_combine returns a size_t. However, this is only used for
|
|
// detailed hashing which, in-tree, only needs to distinguish between
|
|
// differences in functions.
|
|
template <typename T> void hashArbitaryType(const T &V) {
|
|
hash(hash_combine(V));
|
|
}
|
|
|
|
void hashType(Type *ValueType) {
|
|
hash(ValueType->getTypeID());
|
|
if (ValueType->isIntegerTy())
|
|
hash(ValueType->getIntegerBitWidth());
|
|
}
|
|
|
|
public:
|
|
StructuralHashImpl() = default;
|
|
|
|
void updateOperand(Value *Operand) {
|
|
hashType(Operand->getType());
|
|
|
|
// The cases enumerated below are not exhaustive and are only aimed to
|
|
// get decent coverage over the function.
|
|
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
|
|
hashArbitaryType(ConstInt->getValue());
|
|
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
|
|
hashArbitaryType(ConstFP->getValue());
|
|
} else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
|
|
hash(Arg->getArgNo());
|
|
} else if (Function *Func = dyn_cast<Function>(Operand)) {
|
|
// Hashing the name will be deterministic as LLVM's hashing infrastructure
|
|
// has explicit support for hashing strings and will not simply hash
|
|
// the pointer.
|
|
hashArbitaryType(Func->getName());
|
|
}
|
|
}
|
|
|
|
void updateInstruction(const Instruction &Inst, bool DetailedHash) {
|
|
hash(Inst.getOpcode());
|
|
|
|
if (!DetailedHash)
|
|
return;
|
|
|
|
hashType(Inst.getType());
|
|
|
|
// Handle additional properties of specific instructions that cause
|
|
// semantic differences in the IR.
|
|
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
|
|
hash(ComparisonInstruction->getPredicate());
|
|
|
|
for (const auto &Op : Inst.operands())
|
|
updateOperand(Op);
|
|
}
|
|
|
|
// A function hash is calculated by considering only the number of arguments
|
|
// and whether a function is varargs, the order of basic blocks (given by the
|
|
// successors of each basic block in depth first order), and the order of
|
|
// opcodes of each instruction within each of these basic blocks. This mirrors
|
|
// the strategy FunctionComparator::compare() uses to compare functions by
|
|
// walking the BBs in depth first order and comparing each instruction in
|
|
// sequence. Because this hash currently does not look at the operands, it is
|
|
// insensitive to things such as the target of calls and the constants used in
|
|
// the function, which makes it useful when possibly merging functions which
|
|
// are the same modulo constants and call targets.
|
|
//
|
|
// Note that different users of StructuralHash will want different behavior
|
|
// out of it (i.e., MergeFunctions will want something different from PM
|
|
// expensive checks for pass modification status). When modifying this
|
|
// function, most changes should be gated behind an option and enabled
|
|
// selectively.
|
|
void update(const Function &F, bool DetailedHash) {
|
|
// Declarations don't affect analyses.
|
|
if (F.isDeclaration())
|
|
return;
|
|
|
|
hash(0x62642d6b6b2d6b72); // Function header
|
|
|
|
hash(F.isVarArg());
|
|
hash(F.arg_size());
|
|
|
|
SmallVector<const BasicBlock *, 8> BBs;
|
|
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
|
|
|
|
// Walk the blocks in the same order as
|
|
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
|
|
// function "structure." (BB and opcode sequence)
|
|
BBs.push_back(&F.getEntryBlock());
|
|
VisitedBBs.insert(BBs[0]);
|
|
while (!BBs.empty()) {
|
|
const BasicBlock *BB = BBs.pop_back_val();
|
|
|
|
// This random value acts as a block header, as otherwise the partition of
|
|
// opcodes into BBs wouldn't affect the hash, only the order of the
|
|
// opcodes
|
|
hash(45798);
|
|
for (auto &Inst : *BB)
|
|
updateInstruction(Inst, DetailedHash);
|
|
|
|
for (const BasicBlock *Succ : successors(BB))
|
|
if (VisitedBBs.insert(Succ).second)
|
|
BBs.push_back(Succ);
|
|
}
|
|
}
|
|
|
|
void update(const GlobalVariable &GV) {
|
|
// Declarations and used/compiler.used don't affect analyses.
|
|
// Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
|
|
// we ignore anything with the `.llvm` prefix
|
|
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
|
|
return;
|
|
hash(23456); // Global header
|
|
hash(GV.getValueType()->getTypeID());
|
|
}
|
|
|
|
void update(const Module &M, bool DetailedHash) {
|
|
for (const GlobalVariable &GV : M.globals())
|
|
update(GV);
|
|
for (const Function &F : M)
|
|
update(F, DetailedHash);
|
|
}
|
|
|
|
uint64_t getHash() const { return Hash; }
|
|
};
|
|
|
|
} // namespace
|
|
|
|
IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
|
|
StructuralHashImpl H;
|
|
H.update(F, DetailedHash);
|
|
return H.getHash();
|
|
}
|
|
|
|
IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
|
|
StructuralHashImpl H;
|
|
H.update(M, DetailedHash);
|
|
return H.getHash();
|
|
}
|