llvm-project/lld/ELF/LinkerScript.h

//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLD_ELF_LINKER_SCRIPT_H
#define LLD_ELF_LINKER_SCRIPT_H

#include "Config.h"
#include "InputSection.h"
#include "Writer.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Strings.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>

namespace lld::elf {

class Defined;
class InputFile;
class InputSection;
class InputSectionBase;
class OutputSection;
class SectionBase;
class ThunkSection;
struct OutputDesc;
struct SectionClass;
struct SectionClassDesc;

// This represents an r-value in the linker script.
struct ExprValue {
  ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
            const Twine &loc)
      : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}

  ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}

  bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
  uint64_t getValue() const;
  uint64_t getSecAddr() const;
  uint64_t getSectionOffset() const;

  // If a value is relative to a section, it has a non-null Sec.
  SectionBase *sec;

  uint64_t val;
  uint64_t alignment = 1;

  // The original st_type if the expression represents a symbol. Any operation
  // resets type to STT_NOTYPE.
  uint8_t type = llvm::ELF::STT_NOTYPE;

  // True if this expression is enclosed in ABSOLUTE().
  // This flag affects the return value of getValue().
  bool forceAbsolute;

  // Original source location. Used for error messages.
  std::string loc;
};

// This represents an expression in the linker script.
// ScriptParser::readExpr reads an expression and returns an Expr.
// Later, we evaluate the expression by calling the function.
using Expr = std::function<ExprValue()>;

// This enum is used to implement linker script SECTIONS command.
// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
enum SectionsCommandKind {
  AssignmentKind, // . = expr or <sym> = expr
  OutputSectionKind,
  InputSectionKind,
  ByteKind,  // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
  ClassKind, // CLASS(class_name)
};

struct SectionCommand {
  SectionCommand(int k) : kind(k) {}
  int kind;
};

// This represents ". = <expr>" or "<symbol> = <expr>".
struct SymbolAssignment : SectionCommand {
  SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
      : SectionCommand(AssignmentKind), name(name), expression(e),
        symOrder(symOrder), location(loc) {}

  static bool classof(const SectionCommand *c) {
    return c->kind == AssignmentKind;
  }

  // The LHS of an expression. Name is either a symbol name or ".".
  StringRef name;
  Defined *sym = nullptr;

  // The RHS of an expression.
  Expr expression;

  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
  bool provide = false;
  bool hidden = false;

  // This assignment references DATA_SEGMENT_RELRO_END.
  bool dataSegmentRelroEnd = false;

  unsigned symOrder;

  // Holds file name and line number for error reporting.
  std::string location;

  // A string representation of this command. We use this for -Map.
  std::string commandString;

  // Address of this assignment command.
  uint64_t addr;

  // Size of this assignment command. This is usually 0, but if
  // you move '.' this may be greater than 0.
  uint64_t size;
};

// Linker scripts allow additional constraints to be put on output sections.
// If an output section is marked as ONLY_IF_RO, the section is created
// only if its input sections are read-only. Likewise, an output section
// with ONLY_IF_RW is created if all input sections are RW.
enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };

// This struct is used to represent the location and size of regions of
// target memory. Instances of the struct are created by parsing the
// MEMORY command.
struct MemoryRegion {
  MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
               uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
      : name(std::string(name)), origin(origin), length(length), flags(flags),
        invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}

  std::string name;
  Expr origin;
  Expr length;
  // A section can be assigned to the region if any of these ELF section flags
  // are set...
  uint32_t flags;
  // ... or any of these flags are not set.
  // For example, the memory region attribute "r" maps to SHF_WRITE.
  uint32_t invFlags;
  // A section cannot be assigned to the region if any of these ELF section
  // flags are set...
  uint32_t negFlags;
  // ... or any of these flags are not set.
  // For example, the memory region attribute "!r" maps to SHF_WRITE.
  uint32_t negInvFlags;
  uint64_t curPos = 0;

  uint64_t getOrigin() const { return origin().getValue(); }
  uint64_t getLength() const { return length().getValue(); }

  bool compatibleWith(uint32_t secFlags) const {
    if ((secFlags & negFlags) || (~secFlags & negInvFlags))
      return false;
    return (secFlags & flags) || (~secFlags & invFlags);
  }
};

// This struct represents one section match pattern in SECTIONS() command.
// It can optionally have negative match pattern for EXCLUDED_FILE command.
// Also it may be surrounded with SORT() command, so contains sorting rules.
class SectionPattern {
  StringMatcher excludedFilePat;

  // Cache of the most recent input argument and result of excludesFile().
  mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;

public:
  SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
      : excludedFilePat(pat1), sectionPat(pat2),
        sortOuter(SortSectionPolicy::Default),
        sortInner(SortSectionPolicy::Default) {}

  bool excludesFile(const InputFile &file) const;

  StringMatcher sectionPat;
  SortSectionPolicy sortOuter;
  SortSectionPolicy sortInner;
};

class InputSectionDescription : public SectionCommand {
  enum class MatchType { Trivial, WholeArchive, ArchivesExcluded } matchType;
  SingleStringMatcher filePat;

  // Cache of the most recent input argument and result of matchesFile().
  mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;

public:
  InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
                          uint64_t withoutFlags = 0, StringRef classRef = {})
      : SectionCommand(InputSectionKind), matchType(MatchType::Trivial),
        filePat(filePattern), classRef(classRef), withFlags(withFlags),
        withoutFlags(withoutFlags) {
    assert((filePattern.empty() || classRef.empty()) &&
           "file pattern and class reference are mutually exclusive");

    // The matching syntax for whole archives and files outside of an archive
    // can't be handled by SingleStringMatcher, and instead are handled
    // manually within matchesFile()
    if (!filePattern.empty()) {
      if (filePattern.back() == ':') {
        matchType = MatchType::WholeArchive;
        filePat = filePattern.drop_back();
      } else if (filePattern.front() == ':') {
        matchType = MatchType::ArchivesExcluded;
        filePat = filePattern.drop_front();
      }
    }
  }

  static bool classof(const SectionCommand *c) {
    return c->kind == InputSectionKind;
  }

  bool matchesFile(const InputFile &file) const;

  // Input sections that matches at least one of SectionPatterns
  // will be associated with this InputSectionDescription.
  SmallVector<SectionPattern, 0> sectionPatterns;

  // If present, input section matching uses class membership instead of file
  // and section patterns (mutually exclusive).
  StringRef classRef;

  // Includes InputSections and MergeInputSections. Used temporarily during
  // assignment of input sections to output sections.
  SmallVector<InputSectionBase *, 0> sectionBases;

  // Used after the finalizeInputSections() pass. MergeInputSections have been
  // merged into MergeSyntheticSections.
  SmallVector<InputSection *, 0> sections;

  // Temporary record of synthetic ThunkSection instances and the pass that
  // they were created in. This is used to insert newly created ThunkSections
  // into Sections at the end of a createThunks() pass.
  SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;

  // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
  uint64_t withFlags;
  uint64_t withoutFlags;
};

// Represents BYTE(), SHORT(), LONG(), or QUAD().
struct ByteCommand : SectionCommand {
  ByteCommand(Expr e, unsigned size, std::string commandString)
      : SectionCommand(ByteKind), commandString(commandString), expression(e),
        size(size) {}

  static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }

  // Keeps string representing the command. Used for -Map" is perhaps better.
  std::string commandString;

  Expr expression;

  // This is just an offset of this assignment command in the output section.
  unsigned offset;

  // Size of this data command.
  unsigned size;
};

struct InsertCommand {
  SmallVector<StringRef, 0> names;
  bool isAfter;
  StringRef where;
};

// A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between
// certain output sections.
struct NoCrossRefCommand {
  SmallVector<StringRef, 0> outputSections;

  // When true, this describes a NOCROSSREFS_TO command that probits references
  // to the first output section from any of the other sections.
  bool toFirst = false;
};

struct PhdrsCommand {
  StringRef name;
  unsigned type = llvm::ELF::PT_NULL;
  bool hasFilehdr = false;
  bool hasPhdrs = false;
  std::optional<unsigned> flags;
  Expr lmaExpr = nullptr;
};

class LinkerScript final {
  // Temporary state used in processSectionCommands() and assignAddresses()
  // that must be reinitialized for each call to the above functions, and must
  // not be used outside of the scope of a call to the above functions.
  struct AddressState {
    AddressState(const LinkerScript &);
    OutputSection *outSec = nullptr;
    MemoryRegion *memRegion = nullptr;
    MemoryRegion *lmaRegion = nullptr;
    uint64_t lmaOffset = 0;
    uint64_t tbssAddr = 0;
    uint64_t overlaySize;
  };

  Ctx &ctx;
  SmallVector<std::unique_ptr<OutputDesc>, 0> descPool;
  llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;

  StringRef getOutputSectionName(const InputSectionBase *s) const;
  void addSymbol(SymbolAssignment *cmd);
  void declareSymbol(SymbolAssignment *cmd);
  void assignSymbol(SymbolAssignment *cmd, bool inSec);
  void setDot(Expr e, const Twine &loc, bool inSec);
  void expandOutputSection(uint64_t size);
  void expandMemoryRegions(uint64_t size);

  SmallVector<InputSectionBase *, 0>
  computeInputSections(const InputSectionDescription *,
                       ArrayRef<InputSectionBase *>, const SectionBase &outCmd);

  SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);

  void discardSynthetic(OutputSection &);

  SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);

  std::pair<MemoryRegion *, MemoryRegion *>
  findMemoryRegion(OutputSection *sec, MemoryRegion *hint);

  bool assignOffsets(OutputSection *sec);

  // This captures the local AddressState and makes it accessible
  // deliberately. This is needed as there are some cases where we cannot just
  // thread the current state through to a lambda function created by the
  // script parser.
  // This should remain a plain pointer as its lifetime is smaller than
  // LinkerScript.
  AddressState *state = nullptr;

  std::unique_ptr<OutputSection> aether;

  uint64_t dot = 0;

public:
  // OutputSection may be incomplete. Avoid inline ctor/dtor.
  LinkerScript(Ctx &ctx);
  ~LinkerScript();

  OutputDesc *createOutputSection(StringRef name, StringRef location);
  OutputDesc *getOrCreateOutputSection(StringRef name);

  bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
  uint64_t getDot() { return dot; }
  void discard(InputSectionBase &s);

  ExprValue getSymbolValue(StringRef name, const Twine &loc);

  void addOrphanSections();
  void diagnoseOrphanHandling() const;
  void diagnoseMissingSGSectionAddress() const;
  void adjustOutputSections();
  void adjustSectionsAfterSorting();

  SmallVector<std::unique_ptr<PhdrEntry>, 0> createPhdrs();
  bool needsInterpSection();

  bool shouldKeep(InputSectionBase *s);
  std::pair<const OutputSection *, const Defined *> assignAddresses();
  bool spillSections();
  void erasePotentialSpillSections();
  void allocateHeaders(SmallVector<std::unique_ptr<PhdrEntry>, 0> &phdrs);
  void processSectionCommands();
  void processSymbolAssignments();
  void declareSymbols();

  // Used to handle INSERT AFTER statements.
  void processInsertCommands();

  // Describe memory region usage.
  void printMemoryUsage(raw_ostream &os);

  // Record a pending error during an assignAddresses invocation.
  // assignAddresses is executed more than once. Therefore, lld::error should be
  // avoided to not report duplicate errors.
  void recordError(const Twine &msg);

  // Check backward location counter assignment and memory region/LMA overflows.
  void checkFinalScriptConditions() const;

  // Add symbols that are referenced in the linker script to the symbol table.
  // Symbols referenced in a PROVIDE command are only added to the symbol table
  // if the PROVIDE command actually provides the symbol.
  // It also adds the symbols referenced by the used PROVIDE symbols to the
  // linker script referenced symbols list.
  void addScriptReferencedSymbolsToSymTable();

  // Returns true if the PROVIDE symbol should be added to the link.
  // A PROVIDE symbol is added to the link only if it satisfies an
  // undefined reference.
  bool shouldAddProvideSym(StringRef symName);

  // SECTIONS command list.
  SmallVector<SectionCommand *, 0> sectionCommands;

  // PHDRS command list.
  SmallVector<PhdrsCommand, 0> phdrsCommands;

  bool hasSectionsCommand = false;
  bool seenDataAlign = false;
  bool seenRelroEnd = false;
  bool errorOnMissingSection = false;
  SmallVector<SmallString<0>, 0> recordedErrors;

  // List of section patterns specified with KEEP commands. They will
  // be kept even if they are unused and --gc-sections is specified.
  SmallVector<InputSectionDescription *, 0> keptSections;

  // A map from memory region name to a memory region descriptor.
  llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;

  // A list of symbols referenced by the script.
  SmallVector<llvm::StringRef, 0> referencedSymbols;

  // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
  // to be reordered.
  SmallVector<InsertCommand, 0> insertCommands;

  // OutputSections specified by OVERWRITE_SECTIONS.
  SmallVector<OutputDesc *, 0> overwriteSections;

  // NOCROSSREFS(_TO) commands.
  SmallVector<NoCrossRefCommand, 0> noCrossRefs;

  // Sections that will be warned/errored by --orphan-handling.
  SmallVector<const InputSectionBase *, 0> orphanSections;

  // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
  // expression. For example, if the PROVIDE command is:
  //
  // PROVIDE(v = a + b + c);
  //
  // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
  llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
  // Store defined symbols that should ignore PROVIDE commands.
  llvm::DenseSet<Symbol *> unusedProvideSyms;

  // List of potential spill locations (PotentialSpillSection) for an input
  // section.
  struct PotentialSpillList {
    // Never nullptr.
    PotentialSpillSection *head;
    PotentialSpillSection *tail;
  };
  llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists;

  // Named lists of input sections that can be collectively referenced in output
  // section descriptions. Multiple references allow for sections to spill from
  // one output section to another.
  llvm::DenseMap<llvm::CachedHashStringRef, SectionClassDesc *> sectionClasses;
};

} // end namespace lld::elf

#endif // LLD_ELF_LINKER_SCRIPT_H