From 961276af2603ada8f9da4e93adc8490230fbbc51 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 3 Jul 2012 08:24:14 +0000 Subject: [PATCH] [Sanitizer] Extend a symbolizer code. Implemented for Linux only. Use dl_iterate_phdr to get virtual addresses of mapped module sections. To symbolize an address from a module, map this module to memory and obtain pointers to debug info sections. Later these pointers can be passed to constructor of DWARF context-in-memory from LLVM DebugInfo lib. llvm-svn: 159652 --- compiler-rt/lib/asan/asan_stack.cc | 10 +- .../lib/sanitizer_common/sanitizer_common.h | 5 + .../lib/sanitizer_common/sanitizer_linux.cc | 87 ++++++++- .../lib/sanitizer_common/sanitizer_mac.cc | 12 ++ .../lib/sanitizer_common/sanitizer_posix.cc | 12 ++ .../lib/sanitizer_common/sanitizer_procmaps.h | 12 +- .../sanitizer_common/sanitizer_symbolizer.cc | 168 ++++++++++-------- .../sanitizer_common/sanitizer_symbolizer.h | 39 ++++ .../lib/sanitizer_common/sanitizer_win.cc | 16 ++ 9 files changed, 283 insertions(+), 78 deletions(-) diff --git a/compiler-rt/lib/asan/asan_stack.cc b/compiler-rt/lib/asan/asan_stack.cc index 1c60565e85ab..38677d9ba9d5 100644 --- a/compiler-rt/lib/asan/asan_stack.cc +++ b/compiler-rt/lib/asan/asan_stack.cc @@ -46,14 +46,20 @@ void AsanStackTrace::PrintStack(uptr *addr, uptr size) { AddressInfo addr_frames[64]; uptr addr_frames_num = 0; if (FLAG_symbolize) { - addr_frames_num = SymbolizeCode(pc, addr_frames, + bool last_frame = (i == size - 1) || !addr[i + 1]; + addr_frames_num = SymbolizeCode(pc - !last_frame, addr_frames, ASAN_ARRAY_SIZE(addr_frames)); } if (addr_frames_num > 0) { for (uptr j = 0; j < addr_frames_num; j++) { AddressInfo &info = addr_frames[j]; AsanPrintf(" #%zu 0x%zx", frame_num, pc); - if (info.module) { + if (info.function) { + AsanPrintf(" %s", info.function); + } + if (info.file) { + AsanPrintf(" %s:%d:%d", info.file, info.line, info.column); + } else if (info.module) { AsanPrintf(" (%s+0x%zx)", info.module, info.module_offset); } AsanPrintf("\n"); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index e691f48a6f38..380fe8c75455 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -64,6 +64,11 @@ void Report(const char *format, ...); // Returns the number of read bytes or 0 if file can not be opened. uptr ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size, uptr max_len); +// Maps given file to virtual memory, and returns pointer to it +// (or NULL if the mapping failes). Stores the size of mmaped region +// in '*buff_size'. +void *MapFileToMemory(const char *file_name, uptr *buff_size); + const char *GetEnv(const char *name); const char *GetPwd(); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc index 1208e2304481..f9b0c4e6bf76 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc @@ -16,9 +16,13 @@ #include "sanitizer_common.h" #include "sanitizer_internal_defs.h" #include "sanitizer_libc.h" +#include "sanitizer_placement_new.h" #include "sanitizer_procmaps.h" +#include "sanitizer_symbolizer.h" +#include #include +#include #include #include #include @@ -63,9 +67,15 @@ uptr internal_write(fd_t fd, const void *buf, uptr count) { } uptr internal_filesize(fd_t fd) { - struct stat st = {}; +#if __WORDSIZE == 64 + struct stat st; if (syscall(__NR_fstat, fd, &st)) return -1; +#else + struct stat64 st; + if (syscall(__NR_fstat64, fd, &st)) + return -1; +#endif return (uptr)st.st_size; } @@ -153,6 +163,81 @@ const char *GetEnv(const char *name) { return 0; // Not found. } +// ------------------ sanitizer_symbolizer.h +typedef ElfW(Ehdr) Elf_Ehdr; +typedef ElfW(Shdr) Elf_Shdr; + +bool FindDWARFSection(uptr object_file_addr, const char *section_name, + DWARFSection *section) { + Elf_Ehdr *exe = (Elf_Ehdr*)object_file_addr; + Elf_Shdr *sections = (Elf_Shdr*)(object_file_addr + exe->e_shoff); + uptr section_names = object_file_addr + + sections[exe->e_shstrndx].sh_offset; + for (int i = 0; i < exe->e_shnum; i++) { + Elf_Shdr *current_section = §ions[i]; + const char *current_name = (const char*)section_names + + current_section->sh_name; + if (IsFullNameOfDWARFSection(current_name, section_name)) { + section->data = (const char*)object_file_addr + + current_section->sh_offset; + section->size = current_section->sh_size; + return true; + } + } + return false; +} + +#ifdef ANDROID +uptr GetListOfModules(ModuleDIContext *modules, uptr max_modules) { + UNIMPLEMENTED(); +} +#else // ANDROID +struct DlIteratePhdrData { + ModuleDIContext *modules; + uptr current_n; + uptr max_n; +}; + +static const uptr kMaxPathLength = 512; + +static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) { + DlIteratePhdrData *data = (DlIteratePhdrData*)arg; + if (data->current_n == data->max_n) + return 0; + char *module_name = 0; + if (data->current_n == 0) { + // First module is the binary itself. + module_name = (char*)InternalAlloc(kMaxPathLength); + uptr module_name_len = readlink("/proc/self/exe", + module_name, kMaxPathLength); + CHECK_NE(module_name_len, (uptr)-1); + CHECK_LT(module_name_len, kMaxPathLength); + module_name[module_name_len] = '\0'; + } else if (info->dlpi_name) { + module_name = internal_strdup(info->dlpi_name); + } + if (module_name == 0 || module_name[0] == '\0') + return 0; + void *mem = &data->modules[data->current_n]; + ModuleDIContext *cur_module = new(mem) ModuleDIContext(module_name); + data->current_n++; + for (int i = 0; i < info->dlpi_phnum; i++) { + uptr cur_beg = info->dlpi_addr + info->dlpi_phdr[i].p_vaddr; + uptr cur_end = cur_beg + info->dlpi_phdr[i].p_memsz; + cur_module->addAddressRange(cur_beg, cur_end); + } + InternalFree(module_name); + return 0; +} + +uptr GetListOfModules(ModuleDIContext *modules, uptr max_modules) { + CHECK(modules); + DlIteratePhdrData data = {modules, 0, max_modules}; + dl_iterate_phdr(dl_iterate_phdr_cb, &data); + return data.current_n; +} +#endif // ANDROID + // ----------------- sanitizer_procmaps.h ProcessMaps::ProcessMaps() { proc_self_maps_buff_len_ = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc index 38fd08162373..c6e3bd6fe26a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc @@ -18,6 +18,7 @@ #include "sanitizer_internal_defs.h" #include "sanitizer_libc.h" #include "sanitizer_procmaps.h" +#include "sanitizer_symbolizer.h" #include // for _NSGetEnviron #include @@ -106,6 +107,17 @@ const char *GetEnv(const char *name) { return 0; } +// ------------------ sanitizer_symbolizer.h +bool FindDWARFSection(uptr object_file_addr, const char *section_name, + DWARFSection *section) { + UNIMPLEMENTED(); + return false; +} + +uptr GetListOfModules(ModuleDIContext *modules, uptr max_modules) { + UNIMPLEMENTED(); +}; + // ----------------- sanitizer_procmaps.h ProcessMaps::ProcessMaps() { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cc b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cc index 2b4bef5ce0cf..a6d56838a93d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cc @@ -76,6 +76,18 @@ void *Mprotect(uptr fixed_addr, uptr size) { -1, 0); } +void *MapFileToMemory(const char *file_name, uptr *buff_size) { + fd_t fd = internal_open(file_name, false); + CHECK_NE(fd, kInvalidFd); + uptr fsize = internal_filesize(fd); + CHECK_NE(fsize, (uptr)-1); + CHECK_GT(fsize, 0); + *buff_size = RoundUpTo(fsize, kPageSize); + void *map = internal_mmap(0, *buff_size, PROT_READ, MAP_PRIVATE, fd, 0); + return (map == MAP_FAILED) ? 0 : map; +} + + static inline bool IntervalsAreSeparate(uptr start1, uptr end1, uptr start2, uptr end2) { CHECK(start1 <= end1); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h index 15dc69852399..e7f9cac6cf6c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h @@ -41,7 +41,17 @@ class ProcessMaps { for (int i = 0; Next(&start, &end, &file_offset, filename, filename_size); i++) { if (addr >= start && addr < end) { - // Don't subtract 'start' for the first entry. Don't ask me why. + // Don't subtract 'start' for the first entry: + // * If a binary is compiled w/o -pie, then the first entry in + // process maps is likely the binary itself (all dynamic libs + // are mapped higher in address space). For such a binary, + // instruction offset in binary coincides with the actual + // instruction address in virtual memory (as code section + // is mapped to a fixed memory range). + // * If a binary is compiled with -pie, all the modules are + // mapped high at address space (in particular, higher than + // shadow memory of the tool), so the module can't be the + // first entry. *offset = (addr - (i ? start : 0)) + file_offset; return true; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cc b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cc index d7bb65294ef4..438d4c401b4c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cc @@ -19,6 +19,18 @@ namespace __sanitizer { +bool IsFullNameOfDWARFSection(const char *full_name, const char *short_name) { + // Skip "__DWARF," prefix. + if (0 == internal_strncmp(full_name, "__DWARF,", 8)) { + full_name += 8; + } + // Skip . and _ prefices. + while (*full_name == '.' || *full_name == '_') { + full_name++; + } + return 0 == internal_strcmp(full_name, short_name); +} + void AddressInfo::Clear() { InternalFree(module); InternalFree(function); @@ -26,97 +38,105 @@ void AddressInfo::Clear() { internal_memset(this, 0, sizeof(AddressInfo)); } -static const int kMaxModuleNameLength = 4096; - -struct ModuleDesc { - ModuleDesc *next; - uptr start; - uptr end; - uptr offset; - char *full_name; - char *name; - - ModuleDesc(uptr _start, uptr _end, uptr _offset, const char *module_name) { - next = 0; - start = _start; - end = _end; - offset = _offset; - full_name = internal_strdup(module_name); - name = internal_strrchr(module_name, '/'); - if (name == 0) { - name = full_name; - } else { - name++; - } +ModuleDIContext::ModuleDIContext(const char *module_name) { + full_name_ = internal_strdup(module_name); + short_name_ = internal_strrchr(module_name, '/'); + if (short_name_ == 0) { + short_name_ = full_name_; + } else { + short_name_++; } -}; + base_address_ = (uptr)-1; + n_ranges_ = 0; + mapped_addr_ = 0; + mapped_size_ = 0; +} + +void ModuleDIContext::addAddressRange(uptr beg, uptr end) { + CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges); + ranges_[n_ranges_].beg = beg; + ranges_[n_ranges_].end = end; + base_address_ = Min(base_address_, beg); + n_ranges_++; +} + +bool ModuleDIContext::containsAddress(uptr address) const { + for (uptr i = 0; i < n_ranges_; i++) { + if (ranges_[i].beg <= address && address < ranges_[i].end) + return true; + } + return false; +} + +void ModuleDIContext::getAddressInfo(AddressInfo *info) { + info->module = internal_strdup(full_name_); + info->module_offset = info->address - base_address_; + if (mapped_addr_ == 0) + CreateDIContext(); + // FIXME: Use the actual debug info context here. + info->function = 0; + info->file = 0; + info->line = 0; + info->column = 0; +} + +void ModuleDIContext::CreateDIContext() { + mapped_addr_ = (uptr)MapFileToMemory(full_name_, &mapped_size_); + CHECK(mapped_addr_); + DWARFSection debug_info; + DWARFSection debug_abbrev; + DWARFSection debug_line; + DWARFSection debug_aranges; + DWARFSection debug_str; + FindDWARFSection(mapped_addr_, "debug_info", &debug_info); + FindDWARFSection(mapped_addr_, "debug_abbrev", &debug_abbrev); + FindDWARFSection(mapped_addr_, "debug_line", &debug_line); + FindDWARFSection(mapped_addr_, "debug_aranges", &debug_aranges); + FindDWARFSection(mapped_addr_, "debug_str", &debug_str); + // FIXME: Construct actual debug info context using mapped_addr, + // mapped_size and pointers to DWARF sections in memory. +} class Symbolizer { public: - void GetModuleDescriptions() { - ProcessMaps proc_maps; - uptr start, end, offset; - char *module_name = (char*)InternalAlloc(kMaxModuleNameLength); - ModuleDesc *prev_module = 0; - while (proc_maps.Next(&start, &end, &offset, module_name, - kMaxModuleNameLength)) { - void *mem = InternalAlloc(sizeof(ModuleDesc)); - ModuleDesc *cur_module = new(mem) ModuleDesc(start, end, offset, - module_name); - if (!prev_module) { - modules_ = cur_module; - } else { - prev_module->next = cur_module; - } - prev_module = cur_module; - } - InternalFree(module_name); - } - uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { if (max_frames == 0) return 0; AddressInfo *info = &frames[0]; info->Clear(); info->address = addr; - if (modules_ == 0) { - GetModuleDescriptions(); - } - bool first = true; - for (ModuleDesc *module = modules_; module; module = module->next) { - if (addr >= module->start && addr < module->end) { - info->module = internal_strdup(module->full_name); - // Don't subtract 'start' for the first entry: - // * If a binary is compiled w/o -pie, then the first entry in - // process maps is likely the binary itself (all dynamic libs - // are mapped higher in address space). For such a binary, - // instruction offset in binary coincides with the actual - // instruction address in virtual memory (as code section - // is mapped to a fixed memory range). - // * If a binary is compiled with -pie, all the modules are - // mapped high at address space (in particular, higher than - // shadow memory of the tool), so the module can't be the - // first entry. - info->module_offset = (addr - (first ? 0 : module->start)) + - module->offset; - // FIXME: Fill other fields here as well: create debug - // context for a given module and fetch file/line info from it. - info->function = 0; - info->file = 0; - info->line = 0; - info->column = 0; - return 1; - } - first = false; + ModuleDIContext *module = FindModuleForAddress(addr); + if (module) { + module->getAddressInfo(info); + return 1; } return 0; } private: - ModuleDesc *modules_; // List of module descriptions is leaked. + ModuleDIContext *FindModuleForAddress(uptr address) { + if (modules_ == 0) { + modules_ = (ModuleDIContext*)InternalAlloc( + kMaxNumberOfModuleContexts * sizeof(ModuleDIContext)); + CHECK(modules_); + n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts); + CHECK_GT(n_modules_, 0); + CHECK_LT(n_modules_, kMaxNumberOfModuleContexts); + } + for (uptr i = 0; i < n_modules_; i++) { + if (modules_[i].containsAddress(address)) { + return &modules_[i]; + } + } + return 0; + } + static const uptr kMaxNumberOfModuleContexts = 256; + // Array of module debug info contexts is leaked. + ModuleDIContext *modules_; + uptr n_modules_; }; -static Symbolizer symbolizer; +static Symbolizer symbolizer; // Linker initialized. uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) { return symbolizer.SymbolizeCode(address, frames, max_frames); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h index c6104cd5d4f0..616e32e64650 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h @@ -52,6 +52,45 @@ struct AddressInfo { // This function should NOT be called from two threads simultaneously. uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames); +// Debug info routines +struct DWARFSection { + const char *data; + uptr size; +}; +// Returns true on success. +bool FindDWARFSection(uptr object_file_addr, const char *section_name, + DWARFSection *section); +bool IsFullNameOfDWARFSection(const char *full_name, const char *short_name); + +class ModuleDIContext { + public: + explicit ModuleDIContext(const char *module_name); + void addAddressRange(uptr beg, uptr end); + bool containsAddress(uptr address) const; + void getAddressInfo(AddressInfo *info); + + const char *full_name() const { return full_name_; } + + private: + void CreateDIContext(); + + struct AddressRange { + uptr beg; + uptr end; + }; + char *full_name_; + char *short_name_; + uptr base_address_; + static const uptr kMaxNumberOfAddressRanges = 16; + AddressRange ranges_[kMaxNumberOfAddressRanges]; + uptr n_ranges_; + uptr mapped_addr_; + uptr mapped_size_; +}; + +// OS-dependent function that gets the linked list of all loaded modules. +uptr GetListOfModules(ModuleDIContext *modules, uptr max_modules); + } // namespace __sanitizer #endif // SANITIZER_SYMBOLIZER_H diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cc b/compiler-rt/lib/sanitizer_common/sanitizer_win.cc index 04622fa510be..c68a1fee4068 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cc @@ -16,6 +16,7 @@ #include "sanitizer_common.h" #include "sanitizer_libc.h" +#include "sanitizer_symbolizer.h" namespace __sanitizer { @@ -75,6 +76,10 @@ bool MemoryRangeIsAvailable(uptr range_start, uptr range_end) { return true; } +void *MapFileToMemory(const char *file_name, uptr *buff_size) { + UNIMPLEMENTED(); +} + const char *GetEnv(const char *name) { static char env_buffer[32767] = {}; @@ -125,6 +130,17 @@ int Atexit(void (*function)(void)) { return atexit(function); } +// ------------------ sanitizer_symbolizer.h +bool FindDWARFSection(uptr object_file_addr, const char *section_name, + DWARFSection *section) { + UNIMPLEMENTED(); + return false; +} + +uptr GetListOfModules(ModuleDIContext *modules, uptr max_modules) { + UNIMPLEMENTED(); +}; + // ------------------ sanitizer_libc.h void *internal_mmap(void *addr, uptr length, int prot, int flags, int fd, u64 offset) {