llvm-project/llvm/lib/IR/StructuralHash.cpp
Aiden Grossman 09f4c6967f [NFC][IR] Update StructuralHash comment
This patch updates a comment at the top of the structural hash
implementation that was made invalid by
64da0be1fc06ee2199bd27c980736986e0eccd9d. The class is now used for more
than just validating pass return status and is now the sole
implementation rather than being copied from somewhere.
2023-08-29 19:41:14 -07:00

170 lines
5.9 KiB
C++

//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/StructuralHash.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
using namespace llvm;
namespace {
// Basic hashing mechanism to detect structural change to the IR, used to verify
// pass return status consistency with actual change. In addition to being used
// by the MergeFunctions pass.
class StructuralHashImpl {
uint64_t Hash;
void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
// differences in functions.
template <typename T> void hashArbitaryType(const T &V) {
hash(hash_combine(V));
}
void hashType(Type *ValueType) {
hash(ValueType->getTypeID());
if (ValueType->isIntegerTy())
hash(ValueType->getIntegerBitWidth());
}
public:
StructuralHashImpl() : Hash(4) {}
void updateOperand(Value *Operand) {
hashType(Operand->getType());
// The cases enumerated below are not exhaustive and are only aimed to
// get decent coverage over the function.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
hashArbitaryType(ConstInt->getValue());
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
hashArbitaryType(ConstFP->getValue());
} else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
hash(Arg->getArgNo());
} else if (Function *Func = dyn_cast<Function>(Operand)) {
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
hashArbitaryType(Func->getName());
}
}
void updateInstruction(const Instruction &Inst, bool DetailedHash) {
hash(Inst.getOpcode());
if (!DetailedHash)
return;
hashType(Inst.getType());
// Handle additional properties of specific instructions that cause
// semantic differences in the IR.
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
hash(ComparisonInstruction->getPredicate());
for (const auto &Op : Inst.operands())
updateOperand(Op);
}
// A function hash is calculated by considering only the number of arguments
// and whether a function is varargs, the order of basic blocks (given by the
// successors of each basic block in depth first order), and the order of
// opcodes of each instruction within each of these basic blocks. This mirrors
// the strategy FunctionComparator::compare() uses to compare functions by
// walking the BBs in depth first order and comparing each instruction in
// sequence. Because this hash currently does not look at the operands, it is
// insensitive to things such as the target of calls and the constants used in
// the function, which makes it useful when possibly merging functions which
// are the same modulo constants and call targets.
//
// Note that different users of StructuralHash will want different behavior
// out of it (i.e., MergeFunctions will want something different from PM
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
void update(const Function &F, bool DetailedHash) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;
hash(0x62642d6b6b2d6b72); // Function header
hash(F.isVarArg());
hash(F.arg_size());
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
// Walk the blocks in the same order as
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
// function "structure." (BB and opcode sequence)
BBs.push_back(&F.getEntryBlock());
VisitedBBs.insert(BBs[0]);
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();
// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the
// opcodes
hash(45798);
for (auto &Inst : *BB)
updateInstruction(Inst, DetailedHash);
const Instruction *Term = BB->getTerminator();
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
continue;
BBs.push_back(Term->getSuccessor(i));
}
}
}
void update(const GlobalVariable &GV) {
// Declarations and used/compiler.used don't affect analyses.
// Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
// we ignore anything with the `.llvm` prefix
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
return;
hash(23456); // Global header
hash(GV.getValueType()->getTypeID());
}
void update(const Module &M, bool DetailedHash) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
update(F, DetailedHash);
}
uint64_t getHash() const { return Hash; }
};
} // namespace
IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H;
H.update(F, DetailedHash);
return H.getHash();
}
IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H;
H.update(M, DetailedHash);
return H.getHash();
}