//===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements a CFL-based context-insensitive alias analysis // algorithm. It does not depend on types. The algorithm is a mixture of the one // described in "Demand-driven alias analysis for C" by Xin Zheng and Radu // Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to // Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the // papers, we build a graph of the uses of a variable, where each node is a // memory location, and each edge is an action that happened on that memory // location. The "actions" can be one of Dereference, Reference, or Assign. // // Two variables are considered as aliasing iff you can reach one value's node // from the other value's node and the language formed by concatenating all of // the edge labels (actions) conforms to a context-free grammar. // // Because this algorithm requires a graph search on each query, we execute the // algorithm outlined in "Fast algorithms..." (mentioned above) // in order to transform the graph into sets of variables that may alias in // ~nlogn time (n = number of variables), which makes queries take constant // time. //===----------------------------------------------------------------------===// // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and // CFLAA is interprocedural. This is *technically* A Bad Thing, because // FunctionPasses are only allowed to inspect the Function that they're being // run on. Realistically, this likely isn't a problem until we allow // FunctionPasses to run concurrently. #include "llvm/Analysis/CFLAliasAnalysis.h" #include "StratifiedSets.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include using namespace llvm; #define DEBUG_TYPE "cfl-aa" CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(), TLI(TLI) {} CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {} CFLAAResult::~CFLAAResult() {} /// We use InterfaceValue to describe parameters/return value, as well as /// potential memory locations that are pointed to by parameters/return value, /// of a function. /// Index is an integer which represents a single parameter or a return value. /// When the index is 0, it refers to the return value. Non-zero index i refers /// to the i-th parameter. /// DerefLevel indicates the number of dereferences one must perform on the /// parameter/return value to get this InterfaceValue. struct InterfaceValue { unsigned Index; unsigned DerefLevel; }; bool operator==(InterfaceValue lhs, InterfaceValue rhs) { return lhs.Index == rhs.Index && lhs.DerefLevel == rhs.DerefLevel; } bool operator!=(InterfaceValue lhs, InterfaceValue rhs) { return !(lhs == rhs); } /// We use ExternalRelation to describe an externally visible aliasing relations /// between parameters/return value of a function. struct ExternalRelation { InterfaceValue From, To; }; /// We use ExternalAttribute to describe an externally visible StratifiedAttrs /// for parameters/return value. struct ExternalAttribute { InterfaceValue IValue; StratifiedAttrs Attr; }; /// Information we have about a function and would like to keep around. class CFLAAResult::FunctionInfo { StratifiedSets Sets; // RetParamRelations is a collection of ExternalRelations. SmallVector RetParamRelations; // RetParamAttributes is a collection of ExternalAttributes. SmallVector RetParamAttributes; public: FunctionInfo(Function &Fn, const SmallVectorImpl &RetVals, StratifiedSets S); const StratifiedSets &getStratifiedSets() const { return Sets; } const SmallVectorImpl &getRetParamRelations() const { return RetParamRelations; } const SmallVectorImpl &getRetParamAttributes() const { return RetParamAttributes; } }; /// Try to go from a Value* to a Function*. Never returns nullptr. static Optional parentFunctionOfValue(Value *); /// Returns possible functions called by the Inst* into the given /// SmallVectorImpl. Returns true if targets found, false otherwise. This is /// templated so we can use it with CallInsts and InvokeInsts. static bool getPossibleTargets(CallSite, SmallVectorImpl &); const StratifiedIndex StratifiedLink::SetSentinel = std::numeric_limits::max(); namespace { /// StratifiedInfo Attribute things. LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs; LLVM_CONSTEXPR unsigned AttrEscapedIndex = 0; LLVM_CONSTEXPR unsigned AttrUnknownIndex = 1; LLVM_CONSTEXPR unsigned AttrGlobalIndex = 2; LLVM_CONSTEXPR unsigned AttrCallerIndex = 3; LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 4; LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex; LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex; // NOTE: These aren't StratifiedAttrs because bitsets don't have a constexpr // ctor for some versions of MSVC that we support. We could maybe refactor, // but... using StratifiedAttr = unsigned; LLVM_CONSTEXPR StratifiedAttr AttrNone = 0; LLVM_CONSTEXPR StratifiedAttr AttrEscaped = 1 << AttrEscapedIndex; LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex; LLVM_CONSTEXPR StratifiedAttr AttrGlobal = 1 << AttrGlobalIndex; LLVM_CONSTEXPR StratifiedAttr AttrCaller = 1 << AttrCallerIndex; LLVM_CONSTEXPR StratifiedAttr ExternalAttrMask = AttrEscaped | AttrUnknown | AttrGlobal; /// The maximum number of arguments we can put into a summary. LLVM_CONSTEXPR unsigned MaxSupportedArgsInSummary = 50; /// StratifiedSets call for knowledge of "direction", so this is how we /// represent that locally. enum class Level { Same, Above, Below }; /// Edges can be one of four "weights" -- each weight must have an inverse /// weight (Assign has Assign; Reference has Dereference). enum class EdgeType { /// The weight assigned when assigning from or to a value. For example, in: /// %b = getelementptr %a, 0 /// ...The relationships are %b assign %a, and %a assign %b. This used to be /// two edges, but having a distinction bought us nothing. Assign, /// The edge used when we have an edge going from some handle to a Value. /// Examples of this include: /// %b = load %a (%b Dereference %a) /// %b = extractelement %a, 0 (%a Dereference %b) Dereference, /// The edge used when our edge goes from a value to a handle that may have /// contained it at some point. Examples: /// %b = load %a (%a Reference %b) /// %b = extractelement %a, 0 (%b Reference %a) Reference }; /// The Program Expression Graph (PEG) of CFL analysis class CFLGraph { typedef Value *Node; struct Edge { EdgeType Type; Node Other; }; typedef std::vector EdgeList; struct NodeInfo { EdgeList Edges; StratifiedAttrs Attr; }; typedef DenseMap NodeMap; NodeMap NodeImpls; // Gets the inverse of a given EdgeType. static EdgeType flipWeight(EdgeType Initial) { switch (Initial) { case EdgeType::Assign: return EdgeType::Assign; case EdgeType::Dereference: return EdgeType::Reference; case EdgeType::Reference: return EdgeType::Dereference; } llvm_unreachable("Incomplete coverage of EdgeType enum"); } const NodeInfo *getNode(Node N) const { auto Itr = NodeImpls.find(N); if (Itr == NodeImpls.end()) return nullptr; return &Itr->second; } NodeInfo *getNode(Node N) { auto Itr = NodeImpls.find(N); if (Itr == NodeImpls.end()) return nullptr; return &Itr->second; } static Node nodeDeref(const NodeMap::value_type &P) { return P.first; } typedef std::pointer_to_unary_function NodeDerefFun; public: typedef EdgeList::const_iterator const_edge_iterator; typedef mapped_iterator const_node_iterator; bool addNode(Node N) { return NodeImpls.insert(std::make_pair(N, NodeInfo{EdgeList(), AttrNone})) .second; } void addAttr(Node N, StratifiedAttrs Attr) { auto *Info = getNode(N); assert(Info != nullptr); Info->Attr |= Attr; } void addEdge(Node From, Node To, EdgeType Type) { auto *FromInfo = getNode(From); assert(FromInfo != nullptr); auto *ToInfo = getNode(To); assert(ToInfo != nullptr); FromInfo->Edges.push_back(Edge{Type, To}); ToInfo->Edges.push_back(Edge{flipWeight(Type), From}); } StratifiedAttrs attrFor(Node N) const { auto *Info = getNode(N); assert(Info != nullptr); return Info->Attr; } iterator_range edgesFor(Node N) const { auto *Info = getNode(N); assert(Info != nullptr); auto &Edges = Info->Edges; return make_range(Edges.begin(), Edges.end()); } iterator_range nodes() const { return make_range( map_iterator(NodeImpls.begin(), NodeDerefFun(nodeDeref)), map_iterator(NodeImpls.end(), NodeDerefFun(nodeDeref))); } bool empty() const { return NodeImpls.empty(); } std::size_t size() const { return NodeImpls.size(); } }; // This is the result of instantiating InterfaceValue at a particular callsite struct InterprocNode { Value *Val; unsigned DerefLevel; }; // Interprocedural assignment edges that CFLGraph may not easily model struct InterprocEdge { InterprocNode From, To; }; // Interprocedural attribute tagging that CFLGraph may not easily model struct InterprocAttr { InterprocNode Node; StratifiedAttrs Attr; }; /// Gets the edges our graph should have, based on an Instruction* class GetEdgesVisitor : public InstVisitor { CFLAAResult &AA; const TargetLibraryInfo &TLI; CFLGraph &Graph; SmallVectorImpl &ReturnValues; SmallPtrSetImpl &Externals; SmallPtrSetImpl &Escapes; SmallVectorImpl &InterprocEdges; SmallVectorImpl &InterprocAttrs; static bool hasUsefulEdges(ConstantExpr *CE) { // ConstantExpr doesn't have terminators, invokes, or fences, so only needs // to check for compares. return CE->getOpcode() != Instruction::ICmp && CE->getOpcode() != Instruction::FCmp; } void addNode(Value *Val) { if (!Graph.addNode(Val)) return; if (isa(Val)) Externals.insert(Val); else if (auto CExpr = dyn_cast(Val)) if (hasUsefulEdges(CExpr)) visitConstantExpr(CExpr); } void addNodeWithAttr(Value *Val, StratifiedAttrs Attr) { addNode(Val); Graph.addAttr(Val, Attr); } void addEdge(Value *From, Value *To, EdgeType Type) { if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) return; addNode(From); if (To != From) addNode(To); Graph.addEdge(From, To, Type); } public: GetEdgesVisitor(CFLAAResult &AA, const TargetLibraryInfo &TLI, CFLGraph &Graph, SmallVectorImpl &ReturnValues, SmallPtrSetImpl &Externals, SmallPtrSetImpl &Escapes, SmallVectorImpl &InterprocEdges, SmallVectorImpl &InterprocAttrs) : AA(AA), TLI(TLI), Graph(Graph), ReturnValues(ReturnValues), Externals(Externals), Escapes(Escapes), InterprocEdges(InterprocEdges), InterprocAttrs(InterprocAttrs) {} void visitInstruction(Instruction &) { llvm_unreachable("Unsupported instruction encountered"); } void visitReturnInst(ReturnInst &Inst) { if (auto RetVal = Inst.getReturnValue()) { if (RetVal->getType()->isPointerTy()) { addNode(RetVal); ReturnValues.push_back(RetVal); } } } void visitPtrToIntInst(PtrToIntInst &Inst) { auto *Ptr = Inst.getOperand(0); addNodeWithAttr(Ptr, AttrEscaped); } void visitIntToPtrInst(IntToPtrInst &Inst) { auto *Ptr = &Inst; addNodeWithAttr(Ptr, AttrUnknown); } void visitCastInst(CastInst &Inst) { auto *Src = Inst.getOperand(0); addEdge(Src, &Inst, EdgeType::Assign); } void visitBinaryOperator(BinaryOperator &Inst) { auto *Op1 = Inst.getOperand(0); auto *Op2 = Inst.getOperand(1); addEdge(Op1, &Inst, EdgeType::Assign); addEdge(Op2, &Inst, EdgeType::Assign); } void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = Inst.getNewValOperand(); addEdge(Ptr, Val, EdgeType::Dereference); } void visitAtomicRMWInst(AtomicRMWInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = Inst.getValOperand(); addEdge(Ptr, Val, EdgeType::Dereference); } void visitPHINode(PHINode &Inst) { for (Value *Val : Inst.incoming_values()) addEdge(Val, &Inst, EdgeType::Assign); } void visitGetElementPtrInst(GetElementPtrInst &Inst) { auto *Op = Inst.getPointerOperand(); addEdge(Op, &Inst, EdgeType::Assign); } void visitSelectInst(SelectInst &Inst) { // Condition is not processed here (The actual statement producing // the condition result is processed elsewhere). For select, the // condition is evaluated, but not loaded, stored, or assigned // simply as a result of being the condition of a select. auto *TrueVal = Inst.getTrueValue(); auto *FalseVal = Inst.getFalseValue(); addEdge(TrueVal, &Inst, EdgeType::Assign); addEdge(FalseVal, &Inst, EdgeType::Assign); } void visitAllocaInst(AllocaInst &Inst) { Graph.addNode(&Inst); } void visitLoadInst(LoadInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = &Inst; addEdge(Val, Ptr, EdgeType::Reference); } void visitStoreInst(StoreInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = Inst.getValueOperand(); addEdge(Ptr, Val, EdgeType::Dereference); } void visitVAArgInst(VAArgInst &Inst) { // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it does // two things: // 1. Loads a value from *((T*)*Ptr). // 2. Increments (stores to) *Ptr by some target-specific amount. // For now, we'll handle this like a landingpad instruction (by placing the // result in its own group, and having that group alias externals). addNodeWithAttr(&Inst, AttrUnknown); } static bool isFunctionExternal(Function *Fn) { return !Fn->hasExactDefinition(); } bool tryInterproceduralAnalysis(CallSite CS, const SmallVectorImpl &Fns) { assert(Fns.size() > 0); if (CS.arg_size() > MaxSupportedArgsInSummary) return false; // Exit early if we'll fail anyway for (auto *Fn : Fns) { if (isFunctionExternal(Fn) || Fn->isVarArg()) return false; // Fail if the caller does not provide enough arguments assert(Fn->arg_size() <= CS.arg_size()); auto &MaybeInfo = AA.ensureCached(Fn); if (!MaybeInfo.hasValue()) return false; } auto InstantiateInterfaceIndex = [&CS](unsigned Index) { auto Value = (Index == 0) ? CS.getInstruction() : CS.getArgument(Index - 1); return Value->getType()->isPointerTy() ? Value : nullptr; }; for (auto *Fn : Fns) { auto &FnInfo = AA.ensureCached(Fn); assert(FnInfo.hasValue()); auto &RetParamRelations = FnInfo->getRetParamRelations(); for (auto &Relation : RetParamRelations) { auto FromVal = InstantiateInterfaceIndex(Relation.From.Index); auto ToVal = InstantiateInterfaceIndex(Relation.To.Index); if (FromVal && ToVal) { auto FromLevel = Relation.From.DerefLevel; auto ToLevel = Relation.To.DerefLevel; InterprocEdges.push_back( InterprocEdge{InterprocNode{FromVal, FromLevel}, InterprocNode{ToVal, ToLevel}}); } } auto &RetParamAttributes = FnInfo->getRetParamAttributes(); for (auto &Attribute : RetParamAttributes) { if (auto Val = InstantiateInterfaceIndex(Attribute.IValue.Index)) { InterprocAttrs.push_back(InterprocAttr{ InterprocNode{Val, Attribute.IValue.DerefLevel}, Attribute.Attr}); } } } return true; } void visitCallSite(CallSite CS) { auto Inst = CS.getInstruction(); // Make sure all arguments and return value are added to the graph first for (Value *V : CS.args()) addNode(V); if (Inst->getType()->isPointerTy()) addNode(Inst); // Check if Inst is a call to a library function that allocates/deallocates // on the heap. Those kinds of functions do not introduce any aliases. // TODO: address other common library functions such as realloc(), strdup(), // etc. if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI)) return; // TODO: Add support for noalias args/all the other fun function attributes // that we can tack on. SmallVector Targets; if (getPossibleTargets(CS, Targets)) if (tryInterproceduralAnalysis(CS, Targets)) return; // Because the function is opaque, we need to note that anything // could have happened to the arguments (unless the function is marked // readonly or readnone), and that the result could alias just about // anything, too (unless the result is marked noalias). if (!CS.onlyReadsMemory()) for (Value *V : CS.args()) { if (V->getType()->isPointerTy()) Escapes.insert(V); } if (Inst->getType()->isPointerTy()) { auto *Fn = CS.getCalledFunction(); if (Fn == nullptr || !Fn->doesNotAlias(0)) Graph.addAttr(Inst, AttrUnknown); } } /// Because vectors/aggregates are immutable and unaddressable, there's /// nothing we can do to coax a value out of them, other than calling /// Extract{Element,Value}. We can effectively treat them as pointers to /// arbitrary memory locations we can store in and load from. void visitExtractElementInst(ExtractElementInst &Inst) { auto *Ptr = Inst.getVectorOperand(); auto *Val = &Inst; addEdge(Val, Ptr, EdgeType::Reference); } void visitInsertElementInst(InsertElementInst &Inst) { auto *Vec = Inst.getOperand(0); auto *Val = Inst.getOperand(1); addEdge(Vec, &Inst, EdgeType::Assign); addEdge(&Inst, Val, EdgeType::Dereference); } void visitLandingPadInst(LandingPadInst &Inst) { // Exceptions come from "nowhere", from our analysis' perspective. // So we place the instruction its own group, noting that said group may // alias externals addNodeWithAttr(&Inst, AttrUnknown); } void visitInsertValueInst(InsertValueInst &Inst) { auto *Agg = Inst.getOperand(0); auto *Val = Inst.getOperand(1); addEdge(Agg, &Inst, EdgeType::Assign); addEdge(&Inst, Val, EdgeType::Dereference); } void visitExtractValueInst(ExtractValueInst &Inst) { auto *Ptr = Inst.getAggregateOperand(); addEdge(&Inst, Ptr, EdgeType::Reference); } void visitShuffleVectorInst(ShuffleVectorInst &Inst) { auto *From1 = Inst.getOperand(0); auto *From2 = Inst.getOperand(1); addEdge(From1, &Inst, EdgeType::Assign); addEdge(From2, &Inst, EdgeType::Assign); } void visitConstantExpr(ConstantExpr *CE) { switch (CE->getOpcode()) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: \ visit##OPCODE(*(CLASS *)CE); \ break; #include "llvm/IR/Instruction.def" } } }; class CFLGraphBuilder { // Input of the builder CFLAAResult &Analysis; const TargetLibraryInfo &TLI; // Output of the builder CFLGraph Graph; SmallVector ReturnedValues; // Auxiliary structures used by the builder SmallPtrSet ExternalValues; SmallPtrSet EscapedValues; SmallVector InterprocEdges; SmallVector InterprocAttrs; // Helper functions // Determines whether or not we an instruction is useless to us (e.g. // FenceInst) static bool hasUsefulEdges(Instruction *Inst) { bool IsNonInvokeRetTerminator = isa(Inst) && !isa(Inst) && !isa(Inst); return !isa(Inst) && !isa(Inst) && !IsNonInvokeRetTerminator; } void addArgumentToGraph(Argument &Arg) { if (Arg.getType()->isPointerTy()) { Graph.addNode(&Arg); ExternalValues.insert(&Arg); } } // Given an Instruction, this will add it to the graph, along with any // Instructions that are potentially only available from said Instruction // For example, given the following line: // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 // addInstructionToGraph would add both the `load` and `getelementptr` // instructions to the graph appropriately. void addInstructionToGraph(Instruction &Inst) { if (!hasUsefulEdges(&Inst)) return; GetEdgesVisitor(Analysis, TLI, Graph, ReturnedValues, ExternalValues, EscapedValues, InterprocEdges, InterprocAttrs) .visit(Inst); } // Builds the graph needed for constructing the StratifiedSets for the given // function void buildGraphFrom(Function &Fn) { for (auto &Bb : Fn.getBasicBlockList()) for (auto &Inst : Bb.getInstList()) addInstructionToGraph(Inst); for (auto &Arg : Fn.args()) addArgumentToGraph(Arg); } public: CFLGraphBuilder(CFLAAResult &Analysis, const TargetLibraryInfo &TLI, Function &Fn) : Analysis(Analysis), TLI(TLI) { buildGraphFrom(Fn); } const CFLGraph &getCFLGraph() const { return Graph; } const SmallVector &getReturnValues() const { return ReturnedValues; } const SmallPtrSet &getExternalValues() const { return ExternalValues; } const SmallPtrSet &getEscapedValues() const { return EscapedValues; } const SmallVector &getInterprocEdges() const { return InterprocEdges; } const SmallVector &getInterprocAttrs() const { return InterprocAttrs; } }; } //===----------------------------------------------------------------------===// // Function declarations that require types defined in the namespace above //===----------------------------------------------------------------------===// /// Given a StratifiedAttrs, returns true if it marks the corresponding values /// as globals or arguments static bool isGlobalOrArgAttr(StratifiedAttrs Attr); /// Given a StratifiedAttrs, returns true if the corresponding values come from /// an unknown source (such as opaque memory or an integer cast) static bool isUnknownAttr(StratifiedAttrs Attr); /// Given an argument number, returns the appropriate StratifiedAttr to set. static StratifiedAttrs argNumberToAttr(unsigned ArgNum); /// Given a Value, potentially return which StratifiedAttr it maps to. static Optional valueToAttr(Value *Val); /// Gets the "Level" that one should travel in StratifiedSets /// given an EdgeType. static Level directionOfEdgeType(EdgeType); /// Determines whether it would be pointless to add the given Value to our sets. static bool canSkipAddingToSets(Value *Val); static Optional parentFunctionOfValue(Value *Val) { if (auto *Inst = dyn_cast(Val)) { auto *Bb = Inst->getParent(); return Bb->getParent(); } if (auto *Arg = dyn_cast(Val)) return Arg->getParent(); return None; } static bool getPossibleTargets(CallSite CS, SmallVectorImpl &Output) { if (auto *Fn = CS.getCalledFunction()) { Output.push_back(Fn); return true; } // TODO: If the call is indirect, we might be able to enumerate all potential // targets of the call and return them, rather than just failing. return false; } static bool isGlobalOrArgAttr(StratifiedAttrs Attr) { return Attr.reset(AttrEscapedIndex) .reset(AttrUnknownIndex) .reset(AttrCallerIndex) .any(); } static bool isUnknownAttr(StratifiedAttrs Attr) { return Attr.test(AttrUnknownIndex) || Attr.test(AttrCallerIndex); } static Optional valueToAttr(Value *Val) { if (isa(Val)) return StratifiedAttrs(AttrGlobal); if (auto *Arg = dyn_cast(Val)) // Only pointer arguments should have the argument attribute, // because things can't escape through scalars without us seeing a // cast, and thus, interaction with them doesn't matter. if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy()) return argNumberToAttr(Arg->getArgNo()); return None; } static StratifiedAttrs argNumberToAttr(unsigned ArgNum) { if (ArgNum >= AttrMaxNumArgs) return AttrUnknown; // N.B. MSVC complains if we use `1U` here, since StratifiedAttrs' ctor takes // an unsigned long long. return StratifiedAttrs(1ULL << (ArgNum + AttrFirstArgIndex)); } static Level directionOfEdgeType(EdgeType Weight) { switch (Weight) { case EdgeType::Reference: return Level::Above; case EdgeType::Dereference: return Level::Below; case EdgeType::Assign: return Level::Same; } llvm_unreachable("Incomplete switch coverage"); } static bool canSkipAddingToSets(Value *Val) { // Constants can share instances, which may falsely unify multiple // sets, e.g. in // store i32* null, i32** %ptr1 // store i32* null, i32** %ptr2 // clearly ptr1 and ptr2 should not be unified into the same set, so // we should filter out the (potentially shared) instance to // i32* null. if (isa(Val)) { // TODO: Because all of these things are constant, we can determine whether // the data is *actually* mutable at graph building time. This will probably // come for free/cheap with offset awareness. bool CanStoreMutableData = isa(Val) || isa(Val) || isa(Val); return !CanStoreMutableData; } return false; } CFLAAResult::FunctionInfo::FunctionInfo(Function &Fn, const SmallVectorImpl &RetVals, StratifiedSets S) : Sets(std::move(S)) { // Historically, an arbitrary upper-bound of 50 args was selected. We may want // to remove this if it doesn't really matter in practice. if (Fn.arg_size() > MaxSupportedArgsInSummary) return; DenseMap InterfaceMap; // Our intention here is to record all InterfaceValues that share the same // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we // have its StratifiedIndex scanned here and check if the index is presented // in InterfaceMap: if it is not, we add the correspondence to the map; // otherwise, an aliasing relation is found and we add it to // RetParamRelations. auto AddToRetParamRelations = [&](unsigned InterfaceIndex, StratifiedIndex SetIndex) { unsigned Level = 0; while (true) { InterfaceValue CurrValue{InterfaceIndex, Level}; auto Itr = InterfaceMap.find(SetIndex); if (Itr != InterfaceMap.end()) { if (CurrValue != Itr->second) RetParamRelations.push_back(ExternalRelation{CurrValue, Itr->second}); break; } auto &Link = Sets.getLink(SetIndex); InterfaceMap.insert(std::make_pair(SetIndex, CurrValue)); auto ExternalAttrs = Link.Attrs & StratifiedAttrs(ExternalAttrMask); if (ExternalAttrs.any()) RetParamAttributes.push_back( ExternalAttribute{CurrValue, ExternalAttrs}); if (!Link.hasBelow()) break; ++Level; SetIndex = Link.Below; } }; // Populate RetParamRelations for return values for (auto *RetVal : RetVals) { assert(RetVal != nullptr); assert(RetVal->getType()->isPointerTy()); auto RetInfo = Sets.find(RetVal); if (RetInfo.hasValue()) AddToRetParamRelations(0, RetInfo->Index); } // Populate RetParamRelations for parameters unsigned I = 0; for (auto &Param : Fn.args()) { if (Param.getType()->isPointerTy()) { auto ParamInfo = Sets.find(&Param); if (ParamInfo.hasValue()) AddToRetParamRelations(I + 1, ParamInfo->Index); } ++I; } } // Builds the graph + StratifiedSets for a function. CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) { CFLGraphBuilder GraphBuilder(*this, TLI, *Fn); StratifiedSetsBuilder SetBuilder; auto &Graph = GraphBuilder.getCFLGraph(); SmallVector Worklist; for (auto Node : Graph.nodes()) Worklist.push_back(Node); while (!Worklist.empty()) { auto *CurValue = Worklist.pop_back_val(); SetBuilder.add(CurValue); if (canSkipAddingToSets(CurValue)) continue; auto Attr = Graph.attrFor(CurValue); SetBuilder.noteAttributes(CurValue, Attr); for (const auto &Edge : Graph.edgesFor(CurValue)) { auto Label = Edge.Type; auto *OtherValue = Edge.Other; if (canSkipAddingToSets(OtherValue)) continue; bool Added; switch (directionOfEdgeType(Label)) { case Level::Above: Added = SetBuilder.addAbove(CurValue, OtherValue); break; case Level::Below: Added = SetBuilder.addBelow(CurValue, OtherValue); break; case Level::Same: Added = SetBuilder.addWith(CurValue, OtherValue); break; } if (Added) Worklist.push_back(OtherValue); } } // Special handling for globals and arguments for (auto *External : GraphBuilder.getExternalValues()) { SetBuilder.add(External); auto Attr = valueToAttr(External); if (Attr.hasValue()) { SetBuilder.noteAttributes(External, *Attr); if (*Attr == AttrGlobal) SetBuilder.addAttributesBelow(External, 1, AttrUnknown); else SetBuilder.addAttributesBelow(External, 1, AttrCaller); } } // Special handling for interprocedural aliases for (auto &Edge : GraphBuilder.getInterprocEdges()) { auto FromVal = Edge.From.Val; auto ToVal = Edge.To.Val; SetBuilder.add(FromVal); SetBuilder.add(ToVal); SetBuilder.addBelowWith(FromVal, Edge.From.DerefLevel, ToVal, Edge.To.DerefLevel); } // Special handling for interprocedural attributes for (auto &IPAttr : GraphBuilder.getInterprocAttrs()) { auto Val = IPAttr.Node.Val; SetBuilder.add(Val); SetBuilder.addAttributesBelow(Val, IPAttr.Node.DerefLevel, IPAttr.Attr); } // Special handling for opaque external functions for (auto *Escape : GraphBuilder.getEscapedValues()) { SetBuilder.add(Escape); SetBuilder.noteAttributes(Escape, AttrEscaped); SetBuilder.addAttributesBelow(Escape, 1, AttrUnknown); } return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build()); } void CFLAAResult::scan(Function *Fn) { auto InsertPair = Cache.insert(std::make_pair(Fn, Optional())); (void)InsertPair; assert(InsertPair.second && "Trying to scan a function that has already been cached"); // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call // may get evaluated after operator[], potentially triggering a DenseMap // resize and invalidating the reference returned by operator[] auto FunInfo = buildSetsFrom(Fn); Cache[Fn] = std::move(FunInfo); Handles.push_front(FunctionHandle(Fn, this)); } void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); } /// Ensures that the given function is available in the cache, and returns the /// entry. const Optional & CFLAAResult::ensureCached(Function *Fn) { auto Iter = Cache.find(Fn); if (Iter == Cache.end()) { scan(Fn); Iter = Cache.find(Fn); assert(Iter != Cache.end()); assert(Iter->second.hasValue()); } return Iter->second; } AliasResult CFLAAResult::query(const MemoryLocation &LocA, const MemoryLocation &LocB) { auto *ValA = const_cast(LocA.Ptr); auto *ValB = const_cast(LocB.Ptr); if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) return NoAlias; Function *Fn = nullptr; auto MaybeFnA = parentFunctionOfValue(ValA); auto MaybeFnB = parentFunctionOfValue(ValB); if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { // The only times this is known to happen are when globals + InlineAsm are // involved DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n"); return MayAlias; } if (MaybeFnA.hasValue()) { Fn = *MaybeFnA; assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) && "Interprocedural queries not supported"); } else { Fn = *MaybeFnB; } assert(Fn != nullptr); auto &MaybeInfo = ensureCached(Fn); assert(MaybeInfo.hasValue()); auto &Sets = MaybeInfo->getStratifiedSets(); auto MaybeA = Sets.find(ValA); if (!MaybeA.hasValue()) return MayAlias; auto MaybeB = Sets.find(ValB); if (!MaybeB.hasValue()) return MayAlias; auto SetA = *MaybeA; auto SetB = *MaybeB; auto AttrsA = Sets.getLink(SetA.Index).Attrs; auto AttrsB = Sets.getLink(SetB.Index).Attrs; // If both values are local (meaning the corresponding set has attribute // AttrNone or AttrEscaped), then we know that CFLAA fully models them: they // may-alias each other if and only if they are in the same set // If at least one value is non-local (meaning it either is global/argument or // it comes from unknown sources like integer cast), the situation becomes a // bit more interesting. We follow three general rules described below: // - Non-local values may alias each other // - AttrNone values do not alias any non-local values // - AttrEscaped do not alias globals/arguments, but they may alias // AttrUnknown values if (SetA.Index == SetB.Index) return MayAlias; if (AttrsA.none() || AttrsB.none()) return NoAlias; if (isUnknownAttr(AttrsA) || isUnknownAttr(AttrsB)) return MayAlias; if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB)) return MayAlias; return NoAlias; } ModRefInfo CFLAAResult::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { if (auto CalledFunc = CS.getCalledFunction()) { auto &MaybeInfo = ensureCached(const_cast(CalledFunc)); if (!MaybeInfo.hasValue()) return MRI_ModRef; auto &RetParamAttributes = MaybeInfo->getRetParamAttributes(); auto &RetParamRelations = MaybeInfo->getRetParamRelations(); bool ArgAttributeIsWritten = std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(), [ArgIdx](const ExternalAttribute &ExtAttr) { return ExtAttr.IValue.Index == ArgIdx + 1; }); bool ArgIsAccessed = std::any_of(RetParamRelations.begin(), RetParamRelations.end(), [ArgIdx](const ExternalRelation &ExtRelation) { return ExtRelation.To.Index == ArgIdx + 1 || ExtRelation.From.Index == ArgIdx + 1; }); return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef : MRI_ModRef; } return MRI_ModRef; } FunctionModRefBehavior CFLAAResult::getModRefBehavior(ImmutableCallSite CS) { // If we know the callee, try analyzing it if (auto CalledFunc = CS.getCalledFunction()) return getModRefBehavior(CalledFunc); // Otherwise, be conservative return FMRB_UnknownModRefBehavior; } FunctionModRefBehavior CFLAAResult::getModRefBehavior(const Function *F) { assert(F != nullptr); // TODO: Remove the const_cast auto &MaybeInfo = ensureCached(const_cast(F)); if (!MaybeInfo.hasValue()) return FMRB_UnknownModRefBehavior; auto &RetParamAttributes = MaybeInfo->getRetParamAttributes(); auto &RetParamRelations = MaybeInfo->getRetParamRelations(); // First, if any argument is marked Escpaed, Unknown or Global, anything may // happen to them and thus we can't draw any conclusion. if (!RetParamAttributes.empty()) return FMRB_UnknownModRefBehavior; // Currently we don't (and can't) distinguish reads from writes in // RetParamRelations. All we can say is whether there may be memory access or // not. if (RetParamRelations.empty()) return FMRB_DoesNotAccessMemory; // Check if something beyond argmem gets touched. bool AccessArgMemoryOnly = std::all_of(RetParamRelations.begin(), RetParamRelations.end(), [](const ExternalRelation &ExtRelation) { // Both DerefLevels has to be 0, since we don't know which // one is a read and which is a write. return ExtRelation.From.DerefLevel == 0 && ExtRelation.To.DerefLevel == 0; }); return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees : FMRB_UnknownModRefBehavior; } char CFLAA::PassID; CFLAAResult CFLAA::run(Function &F, AnalysisManager &AM) { return CFLAAResult(AM.getResult(F)); } char CFLAAWrapperPass::ID = 0; INITIALIZE_PASS(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", false, true) ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); } CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) { initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry()); } void CFLAAWrapperPass::initializePass() { auto &TLIWP = getAnalysis(); Result.reset(new CFLAAResult(TLIWP.getTLI())); } void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); }