add initial support for coalescing by content (c-strings) with test case

llvm-svn: 147799
2025-04-16 17:46:40 +00:00 · 2012-01-09 20:18:15 +00:00 · 2012-01-09 20:18:15 +00:00 · bfedfc171d
commit bfedfc171d
parent a8f80b31f9
5 changed files with 204 additions and 26 deletions
--- a/lld/include/lld/Core/SymbolTable.h
+++ b/lld/include/lld/Core/SymbolTable.h
@ -14,7 +14,9 @@
 #include <map>
 #include <vector>

-namespace llvm { class StringRef; }
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/DenseSet.h"
+

 namespace lld {

@ -52,12 +54,21 @@ public:
 private:
  typedef std::map<llvm::StringRef, const Atom *> NameToAtom;
  typedef std::map<const Atom *, const Atom *> AtomToAtom;
+  struct MyMappingInfo {
+    static const Atom * getEmptyKey() { return NULL; }
+    static const Atom * getTombstoneKey() { return (Atom*)(-1); }
+    static unsigned getHashValue(const Atom * const Val);
+    static bool isEqual(const Atom * const LHS, const Atom * const RHS);
+  };
+  typedef llvm::DenseSet<const Atom*, MyMappingInfo> AtomContentSet;

  void addByName(const Atom &);
+  void addByContent(const Atom &);

  Platform&  _platform;
  AtomToAtom _replacedAtoms;
  NameToAtom _nameTable;
+  AtomContentSet _contentTable;
 };

 } // namespace lld
--- a/lld/lib/Core/SymbolTable.cpp
+++ b/lld/lib/Core/SymbolTable.cpp
@ -16,6 +16,8 @@
 #include "lld/Platform/Platform.h"

 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/ArrayRef.h"

 #include <algorithm>
 #include <cassert>
@ -34,7 +36,7 @@ void SymbolTable::add(const Atom &atom) {
    this->addByName(atom);
  }
  else if ( atom.mergeDuplicates() ) {
-    // TO DO: support constants merging
+    this->addByContent(atom);
  }
 }

@ -130,6 +132,58 @@ void SymbolTable::addByName(const Atom & newAtom) {
  }
 }

+
+unsigned SymbolTable::MyMappingInfo::getHashValue(const Atom * const atom) {
+  unsigned hash = atom->size();
+  if ( atom->contentType() != Atom::typeZeroFill ) {
+    llvm::ArrayRef<uint8_t> content = atom->rawContent();
+    for (unsigned int i=0; i < content.size(); ++i) {
+      hash = hash * 33 + content[i];
+    }
+  }
+  hash &= 0x00FFFFFF;
+  hash |= ((unsigned)atom->contentType()) << 24;
+  //fprintf(stderr, "atom=%p, hash=0x%08X\n", atom, hash);
+  return hash;
+}
+
+
+bool SymbolTable::MyMappingInfo::isEqual(const Atom * const l, 
+                                         const Atom * const r) {
+  if ( l == r )
+    return true;
+  if ( l == getEmptyKey() )
+    return false;
+  if ( r == getEmptyKey() )
+    return false;
+  if ( l == getTombstoneKey() )
+    return false;
+  if ( r == getTombstoneKey() )
+    return false;
+    
+  if ( l->contentType() != r->contentType() )
+    return false;
+  if ( l->size() != r->size() )
+    return false;
+  llvm::ArrayRef<uint8_t> lc = l->rawContent();
+  llvm::ArrayRef<uint8_t> rc = r->rawContent();
+  return lc.equals(rc);
+}
+
+
+void SymbolTable::addByContent(const Atom & newAtom) {
+  AtomContentSet::iterator pos = _contentTable.find(&newAtom);
+  if ( pos == _contentTable.end() ) {
+    _contentTable.insert(&newAtom);
+    return;
+  }
+  const Atom* existing = *pos;
+    // New atom is not being used.  Add it to replacement table.
+    _replacedAtoms[&newAtom] = existing;
+}
+
+
+
 const Atom *SymbolTable::findByName(llvm::StringRef sym) {
  NameToAtom::iterator pos = _nameTable.find(sym);
  if (pos == _nameTable.end())
--- a/lld/lib/Core/YamlReader.cpp
+++ b/lld/lib/Core/YamlReader.cpp
@ -16,6 +16,7 @@

 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@ -66,18 +67,21 @@ inline llvm::error_code make_error_code(yaml_reader_errors e) {
 class YAML {
 public:
  struct Entry {
-    Entry(const char *k, const char *v, int d, bool bd, bool bs)
+    Entry(const char *k, const char *v, std::vector<uint8_t>* vs, 
+          int d, bool bd, bool bs)
      : key(strdup(k))
-      , value(strdup(v))
+      , value(v ? strdup(v) : NULL)
+      , valueSequenceBytes(vs)
      , depth(d)
      , beginSequence(bs)
      , beginDocument(bd) {}

-    const char *key;
-    const char *value;
-    int         depth;
-    bool        beginSequence;
-    bool        beginDocument;
+    const char *          key;
+    const char *          value;
+    std::vector<uint8_t>* valueSequenceBytes;
+    int                   depth;
+    bool                  beginSequence;
+    bool                  beginDocument;
  };

  static void parse(llvm::MemoryBuffer *mb, std::vector<const Entry *>&);
@ -107,6 +111,8 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
  int depth = 0;
  bool nextKeyIsStartOfDocument = false;
  bool nextKeyIsStartOfSequence = false;
+  std::vector<uint8_t>* sequenceBytes = NULL;
+  unsigned contentByte = 0;
  for (const char *s = mb->getBufferStart(); s < mb->getBufferEnd(); ++s) {
    char c = *s;
    if (c == '\n')
@ -204,7 +210,7 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
        *p++ = c;
        state = inValue;
      } else if (c == '\n') {
-        entries.push_back(new Entry(key, "", depth,
+        entries.push_back(new Entry(key, "", NULL, depth,
                                    nextKeyIsStartOfDocument,
                                    nextKeyIsStartOfSequence));
        nextKeyIsStartOfSequence = false;
@ -212,6 +218,8 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
        state = inDocument;
        depth = 0;
      } else if (c == '[') {
+        contentByte = 0;
+        sequenceBytes = new std::vector<uint8_t>();
        state = inValueSequence;
      } else if (c == ' ') {
        // eat space
@ -226,7 +234,7 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
        *p++ = c;
      } else if (c == '\n') {
        *p = '\0';
-        entries.push_back(new Entry(key, value, depth,
+        entries.push_back(new Entry(key, value, NULL, depth,
                                    nextKeyIsStartOfDocument,
                                    nextKeyIsStartOfSequence));
        nextKeyIsStartOfSequence = false;
@ -236,11 +244,33 @@ void YAML::parse(llvm::MemoryBuffer *mb, std::vector<const Entry *> &entries) {
      }
      break;
    case inValueSequence:
-      if (c == ']')
+      if (c == ']') {
+        sequenceBytes->push_back(contentByte);
        state = inValueSequenceEnd;
+      }
+      else if (c == ' ') {
+        // eat white space
+      }
+      else if (c == ',') {
+        sequenceBytes->push_back(contentByte);
+      }
+      else if ( isdigit(c) ) {
+        contentByte = (contentByte << 4) | (c-'0');
+      } 
+      else if ( ('a' <= tolower(c)) && (tolower(c) <= 'f') ) {
+        contentByte = (contentByte << 4) | (tolower(c)-'a'+10);
+      }
+      else {
+        llvm::report_fatal_error("non-hex digit found in content [ ]");
+      }
      break;
    case inValueSequenceEnd:
      if (c == '\n') {
+        entries.push_back(new Entry(key, NULL, sequenceBytes, depth,
+                                    nextKeyIsStartOfDocument,
+                                    nextKeyIsStartOfSequence));
+        nextKeyIsStartOfSequence = false;
+        nextKeyIsStartOfDocument = false;
        state = inDocument;
        depth = 0;
      }
@ -296,11 +326,13 @@ public:
          , YAMLFile& f
          , const char *n
          , const char* sn
-          , uint64_t sz)
+          , uint64_t sz
+          , std::vector<uint8_t>* c)
    : Atom(ord, d, s, ct, sc, intn, md, ah, dsk, tb, al, a)
    , _file(f)
    , _name(n)
    , _sectionName(sn)
+    , _content(c)
    , _size(sz)
    , _refStartIndex(f._lastRefIndex)
    , _refEndIndex(f._references.size()) {
@ -320,7 +352,7 @@ public:
  }
  
  virtual llvm::StringRef customSectionName() const {
-    return _sectionName;
+    return (_sectionName ? _sectionName : llvm::StringRef());
  }

  virtual uint64_t objectAddress() const {
@ -328,19 +360,26 @@ public:
  }

  virtual uint64_t size() const {
-    return _size;
+    return (_content ? _content->size() : _size);
  }

-  virtual void copyRawContent(uint8_t buffer[]) const { }
+  llvm::ArrayRef<uint8_t> rawContent() const {
+    if ( _content != NULL ) 
+      return llvm::ArrayRef<uint8_t>(*_content);
+    else
+      return llvm::ArrayRef<uint8_t>();
+  }
+  
  virtual Reference::iterator referencesBegin() const;
  virtual Reference::iterator referencesEnd() const;
 private:
-  YAMLFile&      _file;
-  const char *   _name;
-  const char *   _sectionName;
-  unsigned long  _size;
-  unsigned int   _refStartIndex;
-  unsigned int   _refEndIndex;
+  YAMLFile&             _file;
+  const char *          _name;
+  const char *          _sectionName;
+  std::vector<uint8_t>* _content;
+  unsigned long         _size;
+  unsigned int          _refStartIndex;
+  unsigned int          _refEndIndex;
 };

 Reference::iterator YAMLAtom::referencesBegin() const {
@ -384,6 +423,7 @@ public:
  bool _alias;
  bool _autoHide;
  const char *_sectionName;
+  std::vector<uint8_t>* _content;
  Reference _ref;
 };

@ -395,13 +435,15 @@ YAMLAtomState::YAMLAtomState()
  , _type(KeyValues::contentTypeDefault)
  , _scope(KeyValues::scopeDefault)
  , _def(KeyValues::definitionDefault)
+  , _sectionChoice(KeyValues::sectionChoiceDefault)
  , _internalName(KeyValues::internalNameDefault)
  , _mergeDuplicates(KeyValues::mergeDuplicatesDefault)
  , _deadStrip(KeyValues::deadStripKindDefault)
  , _thumb(KeyValues::isThumbDefault)
  , _alias(KeyValues::isAliasDefault) 
  , _autoHide(KeyValues::autoHideDefault)
-  , _sectionName(NULL) {
+  , _sectionName(NULL)
+  , _content(NULL) {
  _ref.target       = NULL;
  _ref.addend       = 0;
  _ref.offsetInAtom = 0;
@ -413,7 +455,7 @@ void YAMLAtomState::makeAtom(YAMLFile& f) {
  Atom *a = new YAMLAtom(_ordinal, _def, _scope, _type, _sectionChoice,
                         _internalName, _mergeDuplicates, _autoHide,  
                         _deadStrip, _thumb, _alias, _align, f, 
-                         _name, _sectionName, _size);
+                         _name, _sectionName, _size, _content);

  f._atoms.push_back(a);
  ++_ordinal;
@ -433,6 +475,7 @@ void YAMLAtomState::makeAtom(YAMLFile& f) {
  _alias            = KeyValues::isAliasDefault;
  _autoHide         = KeyValues::autoHideDefault;
  _sectionName      = NULL;
+  _content          = NULL;
  _ref.target       = NULL;
  _ref.addend       = 0;
  _ref.offsetInAtom = 0;
@ -593,7 +636,7 @@ llvm::error_code parseObjectText( llvm::MemoryBuffer *mb
          haveAtom = true;
        } 
        else if (strcmp(entry->key, KeyValues::contentKeyword) == 0) {
-          // TO DO: switch to content mode
+          atomState._content = entry->valueSequenceBytes;
          haveAtom = true;
        } 
        else if (strcmp(entry->key, "align2") == 0) {
--- a/lld/lib/Core/YamlWriter.cpp
+++ b/lld/lib/Core/YamlWriter.cpp
@ -15,6 +15,8 @@
 #include "lld/Core/Reference.h"

 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/system_error.h"
@ -141,6 +143,24 @@ public:
    }

     
+    if ( atom.contentType() != Atom::typeZeroFill ) {
+      _out  << "      " 
+            << KeyValues::contentKeyword 
+            << ":"
+            << spacePadding(KeyValues::contentKeyword)
+            << "[ ";
+      llvm::ArrayRef<uint8_t> arr = atom.rawContent();
+      bool needComma = false;
+      for (unsigned int i=0; i < arr.size(); ++i) {
+        if ( needComma )
+          _out << ", ";
+        _out << hexdigit(arr[i] >> 4);
+        _out << hexdigit(arr[i] & 0x0F);
+        needComma = true;
+      }
+      _out << " ]\n";
+    }
+
    if (atom.referencesBegin() != atom.referencesEnd()) {
      _out << "      fixups:\n";
      for (Reference::iterator it = atom.referencesBegin(),
@ -160,7 +180,12 @@ private:
    return &spaces[strlen(key)];
  }

-
+  char hexdigit(uint8_t nibble) {
+    if ( nibble < 0x0A )
+      return '0' + nibble;
+    else
+      return 'A' + nibble - 0x0A;
+  }

  llvm::raw_ostream&  _out;
  bool                _firstAtom;
--- a/lld/test/cstring-coalesce.objtxt
+++ b/lld/test/cstring-coalesce.objtxt
@ -0,0 +1,45 @@
+# RUN: lld-core %s | FileCheck %s
+
+#
+# Test that duplicate c-strings are coalesced
+#
+
+---
+atoms:
+    - name:              L0
+      internal-name:     true
+      scope:             hidden
+      type:              c-string
+      merge-duplicates:  true
+      content:           [ 68, 65, 6c, 6c, 6f, 00 ]
+      
+    - name:              L1
+      internal-name:     true
+      scope:             hidden
+      type:              c-string
+      merge-duplicates:  true
+      content:           [ 74, 68, 65, 72, 65, 00 ]
+---
+atoms:
+    - name:              L2
+      internal-name:     true
+      scope:             hidden
+      type:              c-string
+      merge-duplicates:  true
+      content:           [ 68, 65, 6c, 6c, 6f, 00 ]
+---
+atoms:
+    - name:              L2
+      internal-name:     true
+      scope:             hidden
+      type:              c-string
+      merge-duplicates:  true
+      content:           [ 74, 68, 65, 72, 65, 00 ]
+...
+
+# CHECK:       type:       c-string
+# CHECK:       content:    [ 68, 65, 6C, 6C, 6F, 00 ]
+# CHECK:       type:       c-string
+# CHECK:       content:    [ 74, 68, 65, 72, 65, 00 ]
+# CHECK-NOT:   name:
+# CHECK:       ...