michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: michael@0: // Copyright (c) 2006, 2011, 2012 Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Restructured in 2009 by: Jim Blandy michael@0: michael@0: // (derived from) michael@0: // dump_symbols.cc: implement google_breakpad::WriteSymbolFile: michael@0: // Find all the debugging info in a file and dump it as a Breakpad symbol file. michael@0: // michael@0: // dump_symbols.h: Read debugging information from an ELF file, and write michael@0: // it out as a Breakpad symbol file. michael@0: michael@0: // This file is derived from the following files in michael@0: // toolkit/crashreporter/google-breakpad: michael@0: // src/common/linux/dump_symbols.cc michael@0: // src/common/linux/elfutils.cc michael@0: // src/common/linux/file_id.cc michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "mozilla/Assertions.h" michael@0: michael@0: #include "LulPlatformMacros.h" michael@0: #include "LulCommonExt.h" michael@0: #include "LulDwarfExt.h" michael@0: #if defined(LUL_PLAT_arm_android) michael@0: # include "LulExidxExt.h" michael@0: #endif michael@0: #include "LulElfInt.h" michael@0: #include "LulMainInt.h" michael@0: michael@0: michael@0: #if defined(LUL_PLAT_arm_android) && !defined(SHT_ARM_EXIDX) michael@0: // bionic and older glibsc don't define it michael@0: # define SHT_ARM_EXIDX (SHT_LOPROC + 1) michael@0: #endif michael@0: michael@0: michael@0: // This namespace contains helper functions. michael@0: namespace { michael@0: michael@0: using lul::DwarfCFIToModule; michael@0: using lul::FindElfSectionByName; michael@0: using lul::GetOffset; michael@0: using lul::IsValidElf; michael@0: using lul::Module; michael@0: using lul::UniqueString; michael@0: using lul::scoped_ptr; michael@0: using lul::Summariser; michael@0: using std::string; michael@0: using std::vector; michael@0: using std::set; michael@0: michael@0: // michael@0: // FDWrapper michael@0: // michael@0: // Wrapper class to make sure opened file is closed. michael@0: // michael@0: class FDWrapper { michael@0: public: michael@0: explicit FDWrapper(int fd) : michael@0: fd_(fd) {} michael@0: ~FDWrapper() { michael@0: if (fd_ != -1) michael@0: close(fd_); michael@0: } michael@0: int get() { michael@0: return fd_; michael@0: } michael@0: int release() { michael@0: int fd = fd_; michael@0: fd_ = -1; michael@0: return fd; michael@0: } michael@0: private: michael@0: int fd_; michael@0: }; michael@0: michael@0: // michael@0: // MmapWrapper michael@0: // michael@0: // Wrapper class to make sure mapped regions are unmapped. michael@0: // michael@0: class MmapWrapper { michael@0: public: michael@0: MmapWrapper() : is_set_(false) {} michael@0: ~MmapWrapper() { michael@0: if (is_set_ && base_ != NULL) { michael@0: MOZ_ASSERT(size_ > 0); michael@0: munmap(base_, size_); michael@0: } michael@0: } michael@0: void set(void *mapped_address, size_t mapped_size) { michael@0: is_set_ = true; michael@0: base_ = mapped_address; michael@0: size_ = mapped_size; michael@0: } michael@0: void release() { michael@0: MOZ_ASSERT(is_set_); michael@0: is_set_ = false; michael@0: base_ = NULL; michael@0: size_ = 0; michael@0: } michael@0: michael@0: private: michael@0: bool is_set_; michael@0: void *base_; michael@0: size_t size_; michael@0: }; michael@0: michael@0: michael@0: // Set NUM_DW_REGNAMES to be the number of Dwarf register names michael@0: // appropriate to the machine architecture given in HEADER. Return michael@0: // true on success, or false if HEADER's machine architecture is not michael@0: // supported. michael@0: template michael@0: bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, michael@0: unsigned int* num_dw_regnames) { michael@0: switch (elf_header->e_machine) { michael@0: case EM_386: michael@0: *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386(); michael@0: return true; michael@0: case EM_ARM: michael@0: *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM(); michael@0: return true; michael@0: case EM_X86_64: michael@0: *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64(); michael@0: return true; michael@0: default: michael@0: MOZ_ASSERT(0); michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: template michael@0: bool LoadDwarfCFI(const string& dwarf_filename, michael@0: const typename ElfClass::Ehdr* elf_header, michael@0: const char* section_name, michael@0: const typename ElfClass::Shdr* section, michael@0: const bool eh_frame, michael@0: const typename ElfClass::Shdr* got_section, michael@0: const typename ElfClass::Shdr* text_section, michael@0: const bool big_endian, michael@0: SecMap* smap, michael@0: uintptr_t text_bias, michael@0: void (*log)(const char*)) { michael@0: // Find the appropriate set of register names for this file's michael@0: // architecture. michael@0: unsigned int num_dw_regs = 0; michael@0: if (!DwarfCFIRegisterNames(elf_header, &num_dw_regs)) { michael@0: fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" michael@0: " cannot convert DWARF call frame information\n", michael@0: dwarf_filename.c_str(), elf_header->e_machine); michael@0: return false; michael@0: } michael@0: michael@0: const lul::Endianness endianness michael@0: = big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE; michael@0: michael@0: // Find the call frame information and its size. michael@0: const char* cfi = michael@0: GetOffset(elf_header, section->sh_offset); michael@0: size_t cfi_size = section->sh_size; michael@0: michael@0: // Plug together the parser, handler, and their entourages. michael@0: michael@0: // Here's a summariser, which will receive the output of the michael@0: // parser, create summaries, and add them to |smap|. michael@0: Summariser* summ = new Summariser(smap, text_bias, log); michael@0: michael@0: DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name); michael@0: DwarfCFIToModule handler(num_dw_regs, &module_reporter, summ); michael@0: lul::ByteReader byte_reader(endianness); michael@0: michael@0: byte_reader.SetAddressSize(ElfClass::kAddrSize); michael@0: michael@0: // Provide the base addresses for .eh_frame encoded pointers, if michael@0: // possible. michael@0: byte_reader.SetCFIDataBase(section->sh_addr, cfi); michael@0: if (got_section) michael@0: byte_reader.SetDataBase(got_section->sh_addr); michael@0: if (text_section) michael@0: byte_reader.SetTextBase(text_section->sh_addr); michael@0: michael@0: lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename, michael@0: section_name); michael@0: lul::CallFrameInfo parser(cfi, cfi_size, michael@0: &byte_reader, &handler, &dwarf_reporter, michael@0: eh_frame); michael@0: parser.Start(); michael@0: michael@0: delete summ; michael@0: return true; michael@0: } michael@0: michael@0: #if defined(LUL_PLAT_arm_android) michael@0: template michael@0: bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header, michael@0: const typename ElfClass::Shdr* exidx_section, michael@0: const typename ElfClass::Shdr* extab_section, michael@0: uint32_t loading_addr, michael@0: uintptr_t text_bias, michael@0: SecMap* smap, michael@0: void (*log)(const char*)) { michael@0: // To do this properly we need to know: michael@0: // * the bounds of the .ARM.exidx section in the mapped image michael@0: // * the bounds of the .ARM.extab section in the mapped image michael@0: // * the vma of the last byte in the text section associated with the .exidx michael@0: // The first two are easy. The third is a bit tricky. If we can't michael@0: // figure out what it is, just pass in zero. michael@0: // Note that we are reading EXIDX directly out of the mapped in michael@0: // executable image. Unlike with the CFI reader, there is no michael@0: // auxiliary, temporary mapping used to read the unwind data. michael@0: // michael@0: // An .exidx section is always required, but the .extab section michael@0: // can be optionally omitted, provided that .exidx does not refer michael@0: // to it. If the .exidx is erroneous and does refer to .extab even michael@0: // though .extab is missing, the range checks done by GET_EX_U32 in michael@0: // ExceptionTableInfo::ExtabEntryExtract should prevent any invalid michael@0: // memory accesses, and cause the .extab to be rejected as invalid. michael@0: const char *exidx_img michael@0: = GetOffset(elf_header, exidx_section->sh_offset); michael@0: size_t exidx_size = exidx_section->sh_size; michael@0: const char *extab_img michael@0: = extab_section michael@0: ? GetOffset(elf_header, extab_section->sh_offset) michael@0: : nullptr; michael@0: size_t extab_size = extab_section ? extab_section->sh_size : 0; michael@0: michael@0: // The sh_link field of the exidx section gives the section number michael@0: // for the associated text section. michael@0: uint32_t exidx_text_last_svma = 0; michael@0: int exidx_text_sno = exidx_section->sh_link; michael@0: typedef typename ElfClass::Shdr Shdr; michael@0: // |sections| points to the section header table michael@0: const Shdr* sections michael@0: = GetOffset(elf_header, elf_header->e_shoff); michael@0: const int num_sections = elf_header->e_shnum; michael@0: if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) { michael@0: const Shdr* exidx_text_shdr = §ions[exidx_text_sno]; michael@0: if (exidx_text_shdr->sh_size > 0) { michael@0: exidx_text_last_svma michael@0: = exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1; michael@0: } michael@0: } michael@0: michael@0: lul::ARMExToModule handler(smap, log); michael@0: lul::ExceptionTableInfo michael@0: parser(exidx_img, exidx_size, extab_img, extab_size, exidx_text_last_svma, michael@0: &handler, michael@0: reinterpret_cast(elf_header), michael@0: loading_addr, text_bias, log); michael@0: parser.Start(); michael@0: return true; michael@0: } michael@0: #endif /* defined(LUL_PLAT_arm_android) */ michael@0: michael@0: bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, michael@0: void** elf_header) { michael@0: int obj_fd = open(obj_file.c_str(), O_RDONLY); michael@0: if (obj_fd < 0) { michael@0: fprintf(stderr, "Failed to open ELF file '%s': %s\n", michael@0: obj_file.c_str(), strerror(errno)); michael@0: return false; michael@0: } michael@0: FDWrapper obj_fd_wrapper(obj_fd); michael@0: struct stat st; michael@0: if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { michael@0: fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", michael@0: obj_file.c_str(), strerror(errno)); michael@0: return false; michael@0: } michael@0: // Mapping it read-only is good enough. In any case, mapping it michael@0: // read-write confuses Valgrind's debuginfo acquire/discard michael@0: // heuristics, making it hard to profile the profiler. michael@0: void *obj_base = mmap(nullptr, st.st_size, michael@0: PROT_READ, MAP_PRIVATE, obj_fd, 0); michael@0: if (obj_base == MAP_FAILED) { michael@0: fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", michael@0: obj_file.c_str(), strerror(errno)); michael@0: return false; michael@0: } michael@0: map_wrapper->set(obj_base, st.st_size); michael@0: *elf_header = obj_base; michael@0: if (!IsValidElf(*elf_header)) { michael@0: fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); michael@0: return false; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: // Get the endianness of ELF_HEADER. If it's invalid, return false. michael@0: template michael@0: bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, michael@0: bool* big_endian) { michael@0: if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { michael@0: *big_endian = false; michael@0: return true; michael@0: } michael@0: if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { michael@0: *big_endian = true; michael@0: return true; michael@0: } michael@0: michael@0: fprintf(stderr, "bad data encoding in ELF header: %d\n", michael@0: elf_header->e_ident[EI_DATA]); michael@0: return false; michael@0: } michael@0: michael@0: // michael@0: // LoadSymbolsInfo michael@0: // michael@0: // Holds the state between the two calls to LoadSymbols() in case it's necessary michael@0: // to follow the .gnu_debuglink section and load debug information from a michael@0: // different file. michael@0: // michael@0: template michael@0: class LoadSymbolsInfo { michael@0: public: michael@0: typedef typename ElfClass::Addr Addr; michael@0: michael@0: explicit LoadSymbolsInfo(const vector& dbg_dirs) : michael@0: debug_dirs_(dbg_dirs), michael@0: has_loading_addr_(false) {} michael@0: michael@0: // Keeps track of which sections have been loaded so sections don't michael@0: // accidentally get loaded twice from two different files. michael@0: void LoadedSection(const string §ion) { michael@0: if (loaded_sections_.count(section) == 0) { michael@0: loaded_sections_.insert(section); michael@0: } else { michael@0: fprintf(stderr, "Section %s has already been loaded.\n", michael@0: section.c_str()); michael@0: } michael@0: } michael@0: michael@0: string debuglink_file() const { michael@0: return debuglink_file_; michael@0: } michael@0: michael@0: private: michael@0: const vector& debug_dirs_; // Directories in which to michael@0: // search for the debug ELF file. michael@0: michael@0: string debuglink_file_; // Full path to the debug ELF file. michael@0: michael@0: bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. michael@0: michael@0: set loaded_sections_; // Tracks the Loaded ELF sections michael@0: // between calls to LoadSymbols(). michael@0: }; michael@0: michael@0: // Find the preferred loading address of the binary. michael@0: template michael@0: typename ElfClass::Addr GetLoadingAddress( michael@0: const typename ElfClass::Phdr* program_headers, michael@0: int nheader) { michael@0: typedef typename ElfClass::Phdr Phdr; michael@0: michael@0: // For non-PIC executables (e_type == ET_EXEC), the load address is michael@0: // the start address of the first PT_LOAD segment. (ELF requires michael@0: // the segments to be sorted by load address.) For PIC executables michael@0: // and dynamic libraries (e_type == ET_DYN), this address will michael@0: // normally be zero. michael@0: for (int i = 0; i < nheader; ++i) { michael@0: const Phdr& header = program_headers[i]; michael@0: if (header.p_type == PT_LOAD) michael@0: return header.p_vaddr; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: template michael@0: bool LoadSymbols(const string& obj_file, michael@0: const bool big_endian, michael@0: const typename ElfClass::Ehdr* elf_header, michael@0: const bool read_gnu_debug_link, michael@0: LoadSymbolsInfo* info, michael@0: SecMap* smap, michael@0: void* rx_avma, michael@0: void (*log)(const char*)) { michael@0: typedef typename ElfClass::Phdr Phdr; michael@0: typedef typename ElfClass::Shdr Shdr; michael@0: michael@0: char buf[500]; michael@0: snprintf(buf, sizeof(buf), "LoadSymbols: BEGIN %s\n", obj_file.c_str()); michael@0: buf[sizeof(buf)-1] = 0; michael@0: log(buf); michael@0: michael@0: // This is how the text bias is calculated. michael@0: // BEGIN CALCULATE BIAS michael@0: uintptr_t loading_addr = GetLoadingAddress( michael@0: GetOffset(elf_header, elf_header->e_phoff), michael@0: elf_header->e_phnum); michael@0: uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr; michael@0: snprintf(buf, sizeof(buf), michael@0: "LoadSymbols: rx_avma=%llx, text_bias=%llx", michael@0: (unsigned long long int)(uintptr_t)rx_avma, michael@0: (unsigned long long int)text_bias); michael@0: buf[sizeof(buf)-1] = 0; michael@0: log(buf); michael@0: // END CALCULATE BIAS michael@0: michael@0: const Shdr* sections = michael@0: GetOffset(elf_header, elf_header->e_shoff); michael@0: const Shdr* section_names = sections + elf_header->e_shstrndx; michael@0: const char* names = michael@0: GetOffset(elf_header, section_names->sh_offset); michael@0: const char *names_end = names + section_names->sh_size; michael@0: bool found_usable_info = false; michael@0: michael@0: // Dwarf Call Frame Information (CFI) is actually independent from michael@0: // the other DWARF debugging information, and can be used alone. michael@0: const Shdr* dwarf_cfi_section = michael@0: FindElfSectionByName(".debug_frame", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: if (dwarf_cfi_section) { michael@0: // Ignore the return value of this function; even without call frame michael@0: // information, the other debugging information could be perfectly michael@0: // useful. michael@0: info->LoadedSection(".debug_frame"); michael@0: bool result = michael@0: LoadDwarfCFI(obj_file, elf_header, ".debug_frame", michael@0: dwarf_cfi_section, false, 0, 0, big_endian, michael@0: smap, text_bias, log); michael@0: found_usable_info = found_usable_info || result; michael@0: if (result) michael@0: log("LoadSymbols: read CFI from .debug_frame"); michael@0: } michael@0: michael@0: // Linux C++ exception handling information can also provide michael@0: // unwinding data. michael@0: const Shdr* eh_frame_section = michael@0: FindElfSectionByName(".eh_frame", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: if (eh_frame_section) { michael@0: // Pointers in .eh_frame data may be relative to the base addresses of michael@0: // certain sections. Provide those sections if present. michael@0: const Shdr* got_section = michael@0: FindElfSectionByName(".got", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: const Shdr* text_section = michael@0: FindElfSectionByName(".text", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: info->LoadedSection(".eh_frame"); michael@0: // As above, ignore the return value of this function. michael@0: bool result = michael@0: LoadDwarfCFI(obj_file, elf_header, ".eh_frame", michael@0: eh_frame_section, true, michael@0: got_section, text_section, big_endian, michael@0: smap, text_bias, log); michael@0: found_usable_info = found_usable_info || result; michael@0: if (result) michael@0: log("LoadSymbols: read CFI from .eh_frame"); michael@0: } michael@0: michael@0: # if defined(LUL_PLAT_arm_android) michael@0: // ARM has special unwind tables that can be used. .exidx is michael@0: // always required, and .extab is normally required, but may michael@0: // be omitted if it is empty. See comments on LoadARMexidx() michael@0: // for more details. michael@0: const Shdr* arm_exidx_section = michael@0: FindElfSectionByName(".ARM.exidx", SHT_ARM_EXIDX, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: const Shdr* arm_extab_section = michael@0: FindElfSectionByName(".ARM.extab", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: const Shdr* debug_info_section = michael@0: FindElfSectionByName(".debug_info", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: // Only load information from this section if there isn't a .debug_info michael@0: // section. michael@0: if (!debug_info_section && arm_exidx_section) { michael@0: info->LoadedSection(".ARM.exidx"); michael@0: if (arm_extab_section) michael@0: info->LoadedSection(".ARM.extab"); michael@0: bool result = LoadARMexidx(elf_header, michael@0: arm_exidx_section, arm_extab_section, michael@0: loading_addr, text_bias, smap, log); michael@0: found_usable_info = found_usable_info || result; michael@0: if (result) michael@0: log("LoadSymbols: read EXIDX from .ARM.{exidx,extab}"); michael@0: } michael@0: # endif /* defined(LUL_PLAT_arm_android) */ michael@0: michael@0: snprintf(buf, sizeof(buf), "LoadSymbols: END %s\n", obj_file.c_str()); michael@0: buf[sizeof(buf)-1] = 0; michael@0: log(buf); michael@0: michael@0: return found_usable_info; michael@0: } michael@0: michael@0: // Return the breakpad symbol file identifier for the architecture of michael@0: // ELF_HEADER. michael@0: template michael@0: const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { michael@0: typedef typename ElfClass::Half Half; michael@0: Half arch = elf_header->e_machine; michael@0: switch (arch) { michael@0: case EM_386: return "x86"; michael@0: case EM_ARM: return "arm"; michael@0: case EM_MIPS: return "mips"; michael@0: case EM_PPC64: return "ppc64"; michael@0: case EM_PPC: return "ppc"; michael@0: case EM_S390: return "s390"; michael@0: case EM_SPARC: return "sparc"; michael@0: case EM_SPARCV9: return "sparcv9"; michael@0: case EM_X86_64: return "x86_64"; michael@0: default: return NULL; michael@0: } michael@0: } michael@0: michael@0: // Format the Elf file identifier in IDENTIFIER as a UUID with the michael@0: // dashes removed. michael@0: string FormatIdentifier(unsigned char identifier[16]) { michael@0: char identifier_str[40]; michael@0: lul::FileID::ConvertIdentifierToString( michael@0: identifier, michael@0: identifier_str, michael@0: sizeof(identifier_str)); michael@0: string id_no_dash; michael@0: for (int i = 0; identifier_str[i] != '\0'; ++i) michael@0: if (identifier_str[i] != '-') michael@0: id_no_dash += identifier_str[i]; michael@0: // Add an extra "0" by the end. PDB files on Windows have an 'age' michael@0: // number appended to the end of the file identifier; this isn't michael@0: // really used or necessary on other platforms, but be consistent. michael@0: id_no_dash += '0'; michael@0: return id_no_dash; michael@0: } michael@0: michael@0: // Return the non-directory portion of FILENAME: the portion after the michael@0: // last slash, or the whole filename if there are no slashes. michael@0: string BaseFileName(const string &filename) { michael@0: // Lots of copies! basename's behavior is less than ideal. michael@0: char *c_filename = strdup(filename.c_str()); michael@0: string base = basename(c_filename); michael@0: free(c_filename); michael@0: return base; michael@0: } michael@0: michael@0: template michael@0: bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, michael@0: const string& obj_filename, michael@0: const vector& debug_dirs, michael@0: SecMap* smap, void* rx_avma, michael@0: void (*log)(const char*)) { michael@0: typedef typename ElfClass::Ehdr Ehdr; michael@0: michael@0: unsigned char identifier[16]; michael@0: if (!lul michael@0: ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { michael@0: fprintf(stderr, "%s: unable to generate file identifier\n", michael@0: obj_filename.c_str()); michael@0: return false; michael@0: } michael@0: michael@0: const char *architecture = ElfArchitecture(elf_header); michael@0: if (!architecture) { michael@0: fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", michael@0: obj_filename.c_str(), elf_header->e_machine); michael@0: return false; michael@0: } michael@0: michael@0: // Figure out what endianness this file is. michael@0: bool big_endian; michael@0: if (!ElfEndianness(elf_header, &big_endian)) michael@0: return false; michael@0: michael@0: string name = BaseFileName(obj_filename); michael@0: string os = "Linux"; michael@0: string id = FormatIdentifier(identifier); michael@0: michael@0: LoadSymbolsInfo info(debug_dirs); michael@0: if (!LoadSymbols(obj_filename, big_endian, elf_header, michael@0: !debug_dirs.empty(), &info, michael@0: smap, rx_avma, log)) { michael@0: const string debuglink_file = info.debuglink_file(); michael@0: if (debuglink_file.empty()) michael@0: return false; michael@0: michael@0: // Load debuglink ELF file. michael@0: fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); michael@0: MmapWrapper debug_map_wrapper; michael@0: Ehdr* debug_elf_header = NULL; michael@0: if (!LoadELF(debuglink_file, &debug_map_wrapper, michael@0: reinterpret_cast(&debug_elf_header))) michael@0: return false; michael@0: // Sanity checks to make sure everything matches up. michael@0: const char *debug_architecture = michael@0: ElfArchitecture(debug_elf_header); michael@0: if (!debug_architecture) { michael@0: fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", michael@0: debuglink_file.c_str(), debug_elf_header->e_machine); michael@0: return false; michael@0: } michael@0: if (strcmp(architecture, debug_architecture)) { michael@0: fprintf(stderr, "%s with ELF machine architecture %s does not match " michael@0: "%s with ELF architecture %s\n", michael@0: debuglink_file.c_str(), debug_architecture, michael@0: obj_filename.c_str(), architecture); michael@0: return false; michael@0: } michael@0: michael@0: bool debug_big_endian; michael@0: if (!ElfEndianness(debug_elf_header, &debug_big_endian)) michael@0: return false; michael@0: if (debug_big_endian != big_endian) { michael@0: fprintf(stderr, "%s and %s does not match in endianness\n", michael@0: obj_filename.c_str(), debuglink_file.c_str()); michael@0: return false; michael@0: } michael@0: michael@0: if (!LoadSymbols(debuglink_file, debug_big_endian, michael@0: debug_elf_header, false, &info, michael@0: smap, rx_avma, log)) { michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: } // namespace (anon) michael@0: michael@0: michael@0: namespace lul { michael@0: michael@0: bool ReadSymbolDataInternal(const uint8_t* obj_file, michael@0: const string& obj_filename, michael@0: const vector& debug_dirs, michael@0: SecMap* smap, void* rx_avma, michael@0: void (*log)(const char*)) { michael@0: michael@0: if (!IsValidElf(obj_file)) { michael@0: fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); michael@0: return false; michael@0: } michael@0: michael@0: int elfclass = ElfClass(obj_file); michael@0: if (elfclass == ELFCLASS32) { michael@0: return ReadSymbolDataElfClass( michael@0: reinterpret_cast(obj_file), michael@0: obj_filename, debug_dirs, smap, rx_avma, log); michael@0: } michael@0: if (elfclass == ELFCLASS64) { michael@0: return ReadSymbolDataElfClass( michael@0: reinterpret_cast(obj_file), michael@0: obj_filename, debug_dirs, smap, rx_avma, log); michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool ReadSymbolData(const string& obj_file, michael@0: const vector& debug_dirs, michael@0: SecMap* smap, void* rx_avma, michael@0: void (*log)(const char*)) { michael@0: MmapWrapper map_wrapper; michael@0: void* elf_header = NULL; michael@0: if (!LoadELF(obj_file, &map_wrapper, &elf_header)) michael@0: return false; michael@0: michael@0: return ReadSymbolDataInternal(reinterpret_cast(elf_header), michael@0: obj_file, debug_dirs, smap, rx_avma, log); michael@0: } michael@0: michael@0: michael@0: namespace { michael@0: michael@0: template michael@0: void FindElfClassSection(const char *elf_base, michael@0: const char *section_name, michael@0: typename ElfClass::Word section_type, michael@0: const void **section_start, michael@0: int *section_size) { michael@0: typedef typename ElfClass::Ehdr Ehdr; michael@0: typedef typename ElfClass::Shdr Shdr; michael@0: michael@0: MOZ_ASSERT(elf_base); michael@0: MOZ_ASSERT(section_start); michael@0: MOZ_ASSERT(section_size); michael@0: michael@0: MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); michael@0: michael@0: const Ehdr* elf_header = reinterpret_cast(elf_base); michael@0: MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); michael@0: michael@0: const Shdr* sections = michael@0: GetOffset(elf_header, elf_header->e_shoff); michael@0: const Shdr* section_names = sections + elf_header->e_shstrndx; michael@0: const char* names = michael@0: GetOffset(elf_header, section_names->sh_offset); michael@0: const char *names_end = names + section_names->sh_size; michael@0: michael@0: const Shdr* section = michael@0: FindElfSectionByName(section_name, section_type, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: michael@0: if (section != NULL && section->sh_size > 0) { michael@0: *section_start = elf_base + section->sh_offset; michael@0: *section_size = section->sh_size; michael@0: } michael@0: } michael@0: michael@0: template michael@0: void FindElfClassSegment(const char *elf_base, michael@0: typename ElfClass::Word segment_type, michael@0: const void **segment_start, michael@0: int *segment_size) { michael@0: typedef typename ElfClass::Ehdr Ehdr; michael@0: typedef typename ElfClass::Phdr Phdr; michael@0: michael@0: MOZ_ASSERT(elf_base); michael@0: MOZ_ASSERT(segment_start); michael@0: MOZ_ASSERT(segment_size); michael@0: michael@0: MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); michael@0: michael@0: const Ehdr* elf_header = reinterpret_cast(elf_base); michael@0: MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); michael@0: michael@0: const Phdr* phdrs = michael@0: GetOffset(elf_header, elf_header->e_phoff); michael@0: michael@0: for (int i = 0; i < elf_header->e_phnum; ++i) { michael@0: if (phdrs[i].p_type == segment_type) { michael@0: *segment_start = elf_base + phdrs[i].p_offset; michael@0: *segment_size = phdrs[i].p_filesz; michael@0: return; michael@0: } michael@0: } michael@0: } michael@0: michael@0: } // namespace (anon) michael@0: michael@0: bool IsValidElf(const void* elf_base) { michael@0: return strncmp(reinterpret_cast(elf_base), michael@0: ELFMAG, SELFMAG) == 0; michael@0: } michael@0: michael@0: int ElfClass(const void* elf_base) { michael@0: const ElfW(Ehdr)* elf_header = michael@0: reinterpret_cast(elf_base); michael@0: michael@0: return elf_header->e_ident[EI_CLASS]; michael@0: } michael@0: michael@0: bool FindElfSection(const void *elf_mapped_base, michael@0: const char *section_name, michael@0: uint32_t section_type, michael@0: const void **section_start, michael@0: int *section_size, michael@0: int *elfclass) { michael@0: MOZ_ASSERT(elf_mapped_base); michael@0: MOZ_ASSERT(section_start); michael@0: MOZ_ASSERT(section_size); michael@0: michael@0: *section_start = NULL; michael@0: *section_size = 0; michael@0: michael@0: if (!IsValidElf(elf_mapped_base)) michael@0: return false; michael@0: michael@0: int cls = ElfClass(elf_mapped_base); michael@0: if (elfclass) { michael@0: *elfclass = cls; michael@0: } michael@0: michael@0: const char* elf_base = michael@0: static_cast(elf_mapped_base); michael@0: michael@0: if (cls == ELFCLASS32) { michael@0: FindElfClassSection(elf_base, section_name, section_type, michael@0: section_start, section_size); michael@0: return *section_start != NULL; michael@0: } else if (cls == ELFCLASS64) { michael@0: FindElfClassSection(elf_base, section_name, section_type, michael@0: section_start, section_size); michael@0: return *section_start != NULL; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool FindElfSegment(const void *elf_mapped_base, michael@0: uint32_t segment_type, michael@0: const void **segment_start, michael@0: int *segment_size, michael@0: int *elfclass) { michael@0: MOZ_ASSERT(elf_mapped_base); michael@0: MOZ_ASSERT(segment_start); michael@0: MOZ_ASSERT(segment_size); michael@0: michael@0: *segment_start = NULL; michael@0: *segment_size = 0; michael@0: michael@0: if (!IsValidElf(elf_mapped_base)) michael@0: return false; michael@0: michael@0: int cls = ElfClass(elf_mapped_base); michael@0: if (elfclass) { michael@0: *elfclass = cls; michael@0: } michael@0: michael@0: const char* elf_base = michael@0: static_cast(elf_mapped_base); michael@0: michael@0: if (cls == ELFCLASS32) { michael@0: FindElfClassSegment(elf_base, segment_type, michael@0: segment_start, segment_size); michael@0: return *segment_start != NULL; michael@0: } else if (cls == ELFCLASS64) { michael@0: FindElfClassSegment(elf_base, segment_type, michael@0: segment_start, segment_size); michael@0: return *segment_start != NULL; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: michael@0: // (derived from) michael@0: // file_id.cc: Return a unique identifier for a file michael@0: // michael@0: // See file_id.h for documentation michael@0: // michael@0: michael@0: // ELF note name and desc are 32-bits word padded. michael@0: #define NOTE_PADDING(a) ((a + 3) & ~3) michael@0: michael@0: // These functions are also used inside the crashed process, so be safe michael@0: // and use the syscall/libc wrappers instead of direct syscalls or libc. michael@0: michael@0: template michael@0: static bool ElfClassBuildIDNoteIdentifier(const void *section, int length, michael@0: uint8_t identifier[kMDGUIDSize]) { michael@0: typedef typename ElfClass::Nhdr Nhdr; michael@0: michael@0: const void* section_end = reinterpret_cast(section) + length; michael@0: const Nhdr* note_header = reinterpret_cast(section); michael@0: while (reinterpret_cast(note_header) < section_end) { michael@0: if (note_header->n_type == NT_GNU_BUILD_ID) michael@0: break; michael@0: note_header = reinterpret_cast( michael@0: reinterpret_cast(note_header) + sizeof(Nhdr) + michael@0: NOTE_PADDING(note_header->n_namesz) + michael@0: NOTE_PADDING(note_header->n_descsz)); michael@0: } michael@0: if (reinterpret_cast(note_header) >= section_end || michael@0: note_header->n_descsz == 0) { michael@0: return false; michael@0: } michael@0: michael@0: const char* build_id = reinterpret_cast(note_header) + michael@0: sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); michael@0: // Copy as many bits of the build ID as will fit michael@0: // into the GUID space. michael@0: memset(identifier, 0, kMDGUIDSize); michael@0: memcpy(identifier, build_id, michael@0: std::min(kMDGUIDSize, (size_t)note_header->n_descsz)); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: // Attempt to locate a .note.gnu.build-id section in an ELF binary michael@0: // and copy as many bytes of it as will fit into |identifier|. michael@0: static bool FindElfBuildIDNote(const void *elf_mapped_base, michael@0: uint8_t identifier[kMDGUIDSize]) { michael@0: void* note_section; michael@0: int note_size, elfclass; michael@0: if ((!FindElfSegment(elf_mapped_base, PT_NOTE, michael@0: (const void**)¬e_section, ¬e_size, &elfclass) || michael@0: note_size == 0) && michael@0: (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, michael@0: (const void**)¬e_section, ¬e_size, &elfclass) || michael@0: note_size == 0)) { michael@0: return false; michael@0: } michael@0: michael@0: if (elfclass == ELFCLASS32) { michael@0: return ElfClassBuildIDNoteIdentifier(note_section, note_size, michael@0: identifier); michael@0: } else if (elfclass == ELFCLASS64) { michael@0: return ElfClassBuildIDNoteIdentifier(note_section, note_size, michael@0: identifier); michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: // Attempt to locate the .text section of an ELF binary and generate michael@0: // a simple hash by XORing the first page worth of bytes into |identifier|. michael@0: static bool HashElfTextSection(const void *elf_mapped_base, michael@0: uint8_t identifier[kMDGUIDSize]) { michael@0: void* text_section; michael@0: int text_size; michael@0: if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, michael@0: (const void**)&text_section, &text_size, NULL) || michael@0: text_size == 0) { michael@0: return false; michael@0: } michael@0: michael@0: memset(identifier, 0, kMDGUIDSize); michael@0: const uint8_t* ptr = reinterpret_cast(text_section); michael@0: const uint8_t* ptr_end = ptr + std::min(text_size, 4096); michael@0: while (ptr < ptr_end) { michael@0: for (unsigned i = 0; i < kMDGUIDSize; i++) michael@0: identifier[i] ^= ptr[i]; michael@0: ptr += kMDGUIDSize; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: // static michael@0: bool FileID::ElfFileIdentifierFromMappedFile(const void* base, michael@0: uint8_t identifier[kMDGUIDSize]) { michael@0: // Look for a build id note first. michael@0: if (FindElfBuildIDNote(base, identifier)) michael@0: return true; michael@0: michael@0: // Fall back on hashing the first page of the text section. michael@0: return HashElfTextSection(base, identifier); michael@0: } michael@0: michael@0: // static michael@0: void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], michael@0: char* buffer, int buffer_length) { michael@0: uint8_t identifier_swapped[kMDGUIDSize]; michael@0: michael@0: // Endian-ness swap to match dump processor expectation. michael@0: memcpy(identifier_swapped, identifier, kMDGUIDSize); michael@0: uint32_t* data1 = reinterpret_cast(identifier_swapped); michael@0: *data1 = htonl(*data1); michael@0: uint16_t* data2 = reinterpret_cast(identifier_swapped + 4); michael@0: *data2 = htons(*data2); michael@0: uint16_t* data3 = reinterpret_cast(identifier_swapped + 6); michael@0: *data3 = htons(*data3); michael@0: michael@0: int buffer_idx = 0; michael@0: for (unsigned int idx = 0; michael@0: (buffer_idx < buffer_length) && (idx < kMDGUIDSize); michael@0: ++idx) { michael@0: int hi = (identifier_swapped[idx] >> 4) & 0x0F; michael@0: int lo = (identifier_swapped[idx]) & 0x0F; michael@0: michael@0: if (idx == 4 || idx == 6 || idx == 8 || idx == 10) michael@0: buffer[buffer_idx++] = '-'; michael@0: michael@0: buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; michael@0: buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; michael@0: } michael@0: michael@0: // NULL terminate michael@0: buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; michael@0: } michael@0: michael@0: } // namespace lul