//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/IR/StructuralHash.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" using namespace llvm; namespace { // Basic hashing mechanism to detect structural change to the IR, used to verify // pass return status consistency with actual change. In addition to being used // by the MergeFunctions pass. class StructuralHashImpl { stable_hash Hash = 4; bool DetailedHash; // This random value acts as a block header, as otherwise the partition of // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. static constexpr stable_hash BlockHeaderHash = 45798; static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; static constexpr stable_hash GlobalHeaderHash = 23456; /// IgnoreOp is a function that returns true if the operand should be ignored. IgnoreOperandFunc IgnoreOp = nullptr; /// A mapping from instruction indices to instruction pointers. /// The index represents the position of an instruction based on the order in /// which it is first encountered. std::unique_ptr IndexInstruction = nullptr; /// A mapping from pairs of instruction indices and operand indices /// to the hashes of the operands. std::unique_ptr IndexOperandHashMap = nullptr; /// Assign a unique ID to each Value in the order they are first seen. DenseMap ValueToId; static stable_hash hashType(Type *ValueType) { SmallVector Hashes; Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) Hashes.emplace_back(ValueType->getIntegerBitWidth()); return stable_hash_combine(Hashes); } public: StructuralHashImpl() = delete; explicit StructuralHashImpl(bool DetailedHash, IgnoreOperandFunc IgnoreOp = nullptr) : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { if (IgnoreOp) { IndexInstruction = std::make_unique(); IndexOperandHashMap = std::make_unique(); } } static stable_hash hashAPInt(const APInt &I) { SmallVector Hashes; Hashes.emplace_back(I.getBitWidth()); auto RawVals = ArrayRef(I.getRawData(), I.getNumWords()); Hashes.append(RawVals.begin(), RawVals.end()); return stable_hash_combine(Hashes); } static stable_hash hashAPFloat(const APFloat &F) { return hashAPInt(F.bitcastToAPInt()); } static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { if (!GVar.hasInitializer()) return hashGlobalValue(&GVar); // Hash the contents of a string. if (GVar.getName().starts_with(".str")) { auto *C = GVar.getInitializer(); if (const auto *Seq = dyn_cast(C)) if (Seq->isString()) return stable_hash_name(Seq->getAsString()); } // Hash structural contents of Objective-C metadata in specific sections. // This can be extended to other metadata if needed. static constexpr const char *SectionNames[] = { "__cfstring", "__cstring", "__objc_classrefs", "__objc_methname", "__objc_selrefs", }; if (GVar.hasSection()) { StringRef SectionName = GVar.getSection(); for (const char *Name : SectionNames) if (SectionName.contains(Name)) return hashConstant(GVar.getInitializer()); } return hashGlobalValue(&GVar); } static stable_hash hashGlobalValue(const GlobalValue *GV) { if (!GV->hasName()) return 0; return stable_hash_name(GV->getName()); } // Compute a hash for a Constant. This function is logically similar to // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here // we're interested in computing a hash rather than comparing two Constants. // Some of the logic is simplified, e.g, we don't expand GEPOperator. static stable_hash hashConstant(const Constant *C) { SmallVector Hashes; Type *Ty = C->getType(); Hashes.emplace_back(hashType(Ty)); if (C->isNullValue()) { Hashes.emplace_back(static_cast('N')); return stable_hash_combine(Hashes); } if (auto *GVar = dyn_cast(C)) { Hashes.emplace_back(hashGlobalVariable(*GVar)); return stable_hash_combine(Hashes); } if (auto *G = dyn_cast(C)) { Hashes.emplace_back(hashGlobalValue(G)); return stable_hash_combine(Hashes); } if (const auto *Seq = dyn_cast(C)) { if (Seq->isString()) { Hashes.emplace_back(stable_hash_name(Seq->getAsString())); return stable_hash_combine(Hashes); } } switch (C->getValueID()) { case Value::ConstantIntVal: { const APInt &Int = cast(C)->getValue(); Hashes.emplace_back(hashAPInt(Int)); return stable_hash_combine(Hashes); } case Value::ConstantFPVal: { const APFloat &APF = cast(C)->getValueAPF(); Hashes.emplace_back(hashAPFloat(APF)); return stable_hash_combine(Hashes); } case Value::ConstantArrayVal: case Value::ConstantStructVal: case Value::ConstantVectorVal: case Value::ConstantExprVal: { for (const auto &Op : C->operands()) { auto H = hashConstant(cast(Op)); Hashes.emplace_back(H); } return stable_hash_combine(Hashes); } case Value::BlockAddressVal: { const BlockAddress *BA = cast(C); auto H = hashGlobalValue(BA->getFunction()); Hashes.emplace_back(H); return stable_hash_combine(Hashes); } case Value::DSOLocalEquivalentVal: { const auto *Equiv = cast(C); auto H = hashGlobalValue(Equiv->getGlobalValue()); Hashes.emplace_back(H); return stable_hash_combine(Hashes); } default: // Skip other types of constants for simplicity. return stable_hash_combine(Hashes); } } stable_hash hashValue(Value *V) { // Check constant and return its hash. Constant *C = dyn_cast(V); if (C) return hashConstant(C); // Hash argument number. SmallVector Hashes; if (Argument *Arg = dyn_cast(V)) Hashes.emplace_back(Arg->getArgNo()); // Get an index (an insertion order) for the non-constant value. auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size()); Hashes.emplace_back(It->second); return stable_hash_combine(Hashes); } stable_hash hashOperand(Value *Operand) { SmallVector Hashes; Hashes.emplace_back(hashType(Operand->getType())); Hashes.emplace_back(hashValue(Operand)); return stable_hash_combine(Hashes); } stable_hash hashInstruction(const Instruction &Inst) { SmallVector Hashes; Hashes.emplace_back(Inst.getOpcode()); if (!DetailedHash) return stable_hash_combine(Hashes); Hashes.emplace_back(hashType(Inst.getType())); // Handle additional properties of specific instructions that cause // semantic differences in the IR. if (const auto *ComparisonInstruction = dyn_cast(&Inst)) Hashes.emplace_back(ComparisonInstruction->getPredicate()); unsigned InstIdx = 0; if (IndexInstruction) { InstIdx = IndexInstruction->size(); IndexInstruction->try_emplace(InstIdx, const_cast(&Inst)); } for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { auto OpndHash = hashOperand(Op); if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { assert(IndexOperandHashMap); IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash); } else Hashes.emplace_back(OpndHash); } return stable_hash_combine(Hashes); } // A function hash is calculated by considering only the number of arguments // and whether a function is varargs, the order of basic blocks (given by the // successors of each basic block in depth first order), and the order of // opcodes of each instruction within each of these basic blocks. This mirrors // the strategy FunctionComparator::compare() uses to compare functions by // walking the BBs in depth first order and comparing each instruction in // sequence. Because this hash currently does not look at the operands, it is // insensitive to things such as the target of calls and the constants used in // the function, which makes it useful when possibly merging functions which // are the same modulo constants and call targets. // // Note that different users of StructuralHash will want different behavior // out of it (i.e., MergeFunctions will want something different from PM // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; SmallVector Hashes; Hashes.emplace_back(Hash); Hashes.emplace_back(FunctionHeaderHash); Hashes.emplace_back(F.isVarArg()); Hashes.emplace_back(F.arg_size()); SmallVector BBs; SmallPtrSet VisitedBBs; // Walk the blocks in the same order as // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the // function "structure." (BB and opcode sequence) BBs.push_back(&F.getEntryBlock()); VisitedBBs.insert(BBs[0]); while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); Hashes.emplace_back(BlockHeaderHash); for (auto &Inst : *BB) Hashes.emplace_back(hashInstruction(Inst)); for (const BasicBlock *Succ : successors(BB)) if (VisitedBBs.insert(Succ).second) BBs.push_back(Succ); } // Update the combined hash in place. Hash = stable_hash_combine(Hashes); } void update(const GlobalVariable &GV) { // Declarations and used/compiler.used don't affect analyses. // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, // we ignore anything with the `.llvm` prefix if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) return; SmallVector Hashes; Hashes.emplace_back(Hash); Hashes.emplace_back(GlobalHeaderHash); Hashes.emplace_back(GV.getValueType()->getTypeID()); // Update the combined hash in place. Hash = stable_hash_combine(Hashes); } void update(const Module &M) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) update(F); } uint64_t getHash() const { return Hash; } std::unique_ptr getIndexInstrMap() { return std::move(IndexInstruction); } std::unique_ptr getIndexPairOpndHashMap() { return std::move(IndexOperandHashMap); } }; } // namespace stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { StructuralHashImpl H(DetailedHash); H.update(F); return H.getHash(); } stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { return StructuralHashImpl::hashGlobalVariable(GVar); } stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { StructuralHashImpl H(DetailedHash); H.update(M); return H.getHash(); } FunctionHashInfo llvm::StructuralHashWithDifferences(const Function &F, IgnoreOperandFunc IgnoreOp) { StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); H.update(F); return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), H.getIndexPairOpndHashMap()); }