//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// // // The LLVM Compiler Infrastructure // // This file was developed by Chris Lattner and is distributed under // the University of Illinois Open Source License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the IdentifierTokenInfo and IdentifierTable interfaces. // //===----------------------------------------------------------------------===// #include "clang/Lex/IdentifierTable.h" #include "clang/Lex/MacroInfo.h" #include using namespace llvm; using namespace clang; //===----------------------------------------------------------------------===// // IdentifierTokenInfo Implementation //===----------------------------------------------------------------------===// void IdentifierTokenInfo::Destroy() { delete Macro; } //===----------------------------------------------------------------------===// // Memory Allocation Support //===----------------------------------------------------------------------===// /// The identifier table has a very simple memory allocation pattern: it just /// keeps allocating identifiers, then never frees them unless it frees them /// all. As such, we use a simple bump-pointer memory allocator to make /// allocation speedy. Shark showed that malloc was 27% of the time spent in /// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this. #define USE_ALLOCATOR 1 #if USE_ALLOCATOR namespace { class MemRegion { unsigned RegionSize; MemRegion *Next; char *NextPtr; public: void Init(unsigned size, MemRegion *next) { RegionSize = size; Next = next; NextPtr = (char*)(this+1); // FIXME: uses GCC extension. unsigned Alignment = __alignof__(IdentifierTokenInfo); NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) & ~(intptr_t)(Alignment-1)); } const MemRegion *getNext() const { return Next; } unsigned getNumBytesAllocated() const { return NextPtr-(const char*)this; } /// Allocate - Allocate and return at least the specified number of bytes. /// void *Allocate(unsigned AllocSize, MemRegion **RegPtr) { // FIXME: uses GCC extension. unsigned Alignment = __alignof__(IdentifierTokenInfo); // Round size up to an even multiple of the alignment. AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1); // If there is space in this region for the identifier, return it. if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) { void *Result = NextPtr; NextPtr += AllocSize; return Result; } // Otherwise, we have to allocate a new chunk. Create one twice as big as // this one. MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2); NewRegion->Init(RegionSize*2, this); // Update the current "first region" pointer to point to the new region. *RegPtr = NewRegion; // Try allocating from it now. return NewRegion->Allocate(AllocSize, RegPtr); } /// Deallocate - Release all memory for this region to the system. /// void Deallocate() { MemRegion *next = Next; free(this); if (next) next->Deallocate(); } }; } #endif //===----------------------------------------------------------------------===// // IdentifierTable Implementation //===----------------------------------------------------------------------===// /// IdentifierLink - There is one of these allocated by IdentifierTokenInfo. /// These form the linked list of buckets for the hash table. struct IdentifierBucket { /// Next - This is the next bucket in the linked list. IdentifierBucket *Next; IdentifierTokenInfo TokInfo; // NOTE: TokInfo must be the last element in this structure, as the string // information for the identifier is allocated right after it. }; // FIXME: start hashtablesize off at 8K entries, GROW when density gets to 3. static unsigned HASH_TABLE_SIZE = 8096; IdentifierTable::IdentifierTable() { IdentifierBucket **TableArray = new IdentifierBucket*[HASH_TABLE_SIZE](); TheTable = TableArray; NumIdentifiers = 0; #if USE_ALLOCATOR TheMemory = malloc(8*4096); ((MemRegion*)TheMemory)->Init(8*4096, 0); #endif memset(TheTable, 0, HASH_TABLE_SIZE*sizeof(IdentifierBucket*)); } IdentifierTable::~IdentifierTable() { IdentifierBucket **TableArray = (IdentifierBucket**)TheTable; for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) { IdentifierBucket *Id = TableArray[i]; while (Id) { // Free memory referenced by the identifier (e.g. macro info). Id->TokInfo.Destroy(); IdentifierBucket *Next = Id->Next; #if !USE_ALLOCATOR free(Id); #endif Id = Next; } } #if USE_ALLOCATOR ((MemRegion*)TheMemory)->Deallocate(); #endif delete [] TableArray; } /// HashString - Compute a hash code for the specified string. /// static unsigned HashString(const char *Start, const char *End) { unsigned int Result = 0; // Perl hash function. while (Start != End) Result = Result * 33 + *Start++; Result = Result + (Result >> 5); return Result; } IdentifierTokenInfo &IdentifierTable::get(const char *NameStart, const char *NameEnd) { IdentifierBucket **TableArray = (IdentifierBucket**)TheTable; unsigned Hash = HashString(NameStart, NameEnd) % HASH_TABLE_SIZE; unsigned Length = NameEnd-NameStart; IdentifierBucket *IdentHead = TableArray[Hash]; for (IdentifierBucket *Identifier = IdentHead; Identifier; Identifier = Identifier->Next) { if (Identifier->TokInfo.getNameLength() == Length && memcmp(Identifier->TokInfo.getName(), NameStart, Length) == 0) return Identifier->TokInfo; } // Allocate a new identifier, with space for the null-terminated string at the // end. unsigned AllocSize = sizeof(IdentifierBucket)+Length+1; #if USE_ALLOCATOR IdentifierBucket *Identifier = (IdentifierBucket*) ((MemRegion*)TheMemory)->Allocate(AllocSize, (MemRegion**)&TheMemory); #else IdentifierBucket *Identifier = (IdentifierBucket*)malloc(AllocSize); #endif Identifier->TokInfo.NameLen = Length; Identifier->TokInfo.Macro = 0; Identifier->TokInfo.TokenID = tok::identifier; Identifier->TokInfo.IsExtension = false; Identifier->TokInfo.IsPoisoned = false; Identifier->TokInfo.FETokenInfo = 0; // Copy the string information. char *StrBuffer = (char*)(Identifier+1); memcpy(StrBuffer, NameStart, Length); StrBuffer[Length] = 0; // Null terminate string. // Link it into the hash table. Identifier->Next = IdentHead; TableArray[Hash] = Identifier; return Identifier->TokInfo; } IdentifierTokenInfo &IdentifierTable::get(const std::string &Name) { // Don't use c_str() here: no need to be null terminated. const char *NameBytes = &Name[0]; unsigned Size = Name.size(); return get(NameBytes, NameBytes+Size); } /// PrintStats - Print statistics about how well the identifier table is doing /// at hashing identifiers. void IdentifierTable::PrintStats() const { unsigned NumIdentifiers = 0; unsigned NumEmptyBuckets = 0; unsigned MaxBucketLength = 0; unsigned AverageIdentifierSize = 0; unsigned MaxIdentifierLength = 0; IdentifierBucket **TableArray = (IdentifierBucket**)TheTable; for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) { unsigned NumIdentifiersInBucket = 0; for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) { AverageIdentifierSize += Id->TokInfo.getNameLength(); if (MaxIdentifierLength < Id->TokInfo.getNameLength()) MaxIdentifierLength = Id->TokInfo.getNameLength(); ++NumIdentifiersInBucket; } if (NumIdentifiersInBucket > MaxBucketLength) MaxBucketLength = NumIdentifiersInBucket; if (NumIdentifiersInBucket == 0) ++NumEmptyBuckets; NumIdentifiers += NumIdentifiersInBucket; } std::cerr << "\n*** Identifier Table Stats:\n"; std::cerr << "# Identifiers: " << NumIdentifiers << "\n"; std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n"; std::cerr << "Max identifiers in one bucket: " << MaxBucketLength << "\n"; std::cerr << "Hash density (#identifiers per bucket): " << NumIdentifiers/(double)HASH_TABLE_SIZE << "\n"; std::cerr << "Nonempty hash density (average chain length): " << NumIdentifiers/(double)(HASH_TABLE_SIZE-NumEmptyBuckets) << "\n"; std::cerr << "Ave identifier length: " << (AverageIdentifierSize/(double)NumIdentifiers) << "\n"; std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n"; // Compute statistics about the memory allocated for identifiers. #if USE_ALLOCATOR unsigned BytesUsed = 0; unsigned NumRegions = 0; const MemRegion *R = (MemRegion*)TheMemory; for (; R; R = R->getNext(), ++NumRegions) { BytesUsed += R->getNumBytesAllocated(); } std::cerr << "\nNumber of memory regions: " << NumRegions << "\n"; std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n"; #endif }