michael@0: // Copyright (c) 2011 Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Restructured in 2009 by: Jim Blandy michael@0: michael@0: // dump_symbols.cc: implement google_breakpad::WriteSymbolFile: michael@0: // Find all the debugging info in a file and dump it as a Breakpad symbol file. michael@0: michael@0: #include "common/linux/dump_symbols.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "common/arm_ex_reader.h" michael@0: #include "common/dwarf/bytereader-inl.h" michael@0: #include "common/dwarf/dwarf2diehandler.h" michael@0: #include "common/dwarf_cfi_to_module.h" michael@0: #include "common/dwarf_cu_to_module.h" michael@0: #include "common/dwarf_line_to_module.h" michael@0: #include "common/linux/elfutils.h" michael@0: #include "common/linux/elfutils-inl.h" michael@0: #include "common/linux/elf_symbols_to_module.h" michael@0: #include "common/linux/file_id.h" michael@0: #include "common/module.h" michael@0: #include "common/scoped_ptr.h" michael@0: #ifndef NO_STABS_SUPPORT michael@0: #include "common/stabs_reader.h" michael@0: #include "common/stabs_to_module.h" michael@0: #endif michael@0: #include "common/using_std_string.h" michael@0: #include "common/logging.h" michael@0: michael@0: #ifndef SHT_ARM_EXIDX michael@0: // bionic and older glibc don't define it michael@0: # define SHT_ARM_EXIDX (SHT_LOPROC + 1) michael@0: #endif michael@0: michael@0: // This namespace contains helper functions. michael@0: namespace { michael@0: michael@0: using google_breakpad::DwarfCFIToModule; michael@0: using google_breakpad::DwarfCUToModule; michael@0: using google_breakpad::DwarfLineToModule; michael@0: using google_breakpad::ElfClass; michael@0: using google_breakpad::ElfClass32; michael@0: using google_breakpad::ElfClass64; michael@0: using google_breakpad::FindElfSectionByName; michael@0: using google_breakpad::GetOffset; michael@0: using google_breakpad::IsValidElf; michael@0: using google_breakpad::Module; michael@0: #ifndef NO_STABS_SUPPORT michael@0: using google_breakpad::StabsToModule; michael@0: #endif michael@0: using google_breakpad::UniqueString; michael@0: using google_breakpad::scoped_ptr; michael@0: michael@0: // michael@0: // FDWrapper michael@0: // michael@0: // Wrapper class to make sure opened file is closed. michael@0: // michael@0: class FDWrapper { michael@0: public: michael@0: explicit FDWrapper(int fd) : michael@0: fd_(fd) {} michael@0: ~FDWrapper() { michael@0: if (fd_ != -1) michael@0: close(fd_); michael@0: } michael@0: int get() { michael@0: return fd_; michael@0: } michael@0: int release() { michael@0: int fd = fd_; michael@0: fd_ = -1; michael@0: return fd; michael@0: } michael@0: private: michael@0: int fd_; michael@0: }; michael@0: michael@0: // michael@0: // MmapWrapper michael@0: // michael@0: // Wrapper class to make sure mapped regions are unmapped. michael@0: // michael@0: class MmapWrapper { michael@0: public: michael@0: MmapWrapper() : is_set_(false) {} michael@0: ~MmapWrapper() { michael@0: if (is_set_ && base_ != NULL) { michael@0: assert(size_ > 0); michael@0: munmap(base_, size_); michael@0: } michael@0: } michael@0: void set(void *mapped_address, size_t mapped_size) { michael@0: is_set_ = true; michael@0: base_ = mapped_address; michael@0: size_ = mapped_size; michael@0: } michael@0: void release() { michael@0: assert(is_set_); michael@0: is_set_ = false; michael@0: base_ = NULL; michael@0: size_ = 0; michael@0: } michael@0: michael@0: private: michael@0: bool is_set_; michael@0: void *base_; michael@0: size_t size_; michael@0: }; michael@0: michael@0: // Find the preferred loading address of the binary. michael@0: template michael@0: typename ElfClass::Addr GetLoadingAddress( michael@0: const typename ElfClass::Phdr* program_headers, michael@0: int nheader) { michael@0: typedef typename ElfClass::Phdr Phdr; michael@0: michael@0: for (int i = 0; i < nheader; ++i) { michael@0: const Phdr& header = program_headers[i]; michael@0: // For executable, it is the PT_LOAD segment with offset to zero. michael@0: if (header.p_type == PT_LOAD && michael@0: header.p_offset == 0) michael@0: return header.p_vaddr; michael@0: } michael@0: // For other types of ELF, return 0. michael@0: return 0; michael@0: } michael@0: michael@0: #ifndef NO_STABS_SUPPORT michael@0: template michael@0: bool LoadStabs(const typename ElfClass::Ehdr* elf_header, michael@0: const typename ElfClass::Shdr* stab_section, michael@0: const typename ElfClass::Shdr* stabstr_section, michael@0: const bool big_endian, michael@0: Module* module) { michael@0: // A callback object to handle data from the STABS reader. michael@0: StabsToModule handler(module); michael@0: // Find the addresses of the STABS data, and create a STABS reader object. michael@0: // On Linux, STABS entries always have 32-bit values, regardless of the michael@0: // address size of the architecture whose code they're describing, and michael@0: // the strings are always "unitized". michael@0: const uint8_t* stabs = michael@0: GetOffset(elf_header, stab_section->sh_offset); michael@0: const uint8_t* stabstr = michael@0: GetOffset(elf_header, stabstr_section->sh_offset); michael@0: google_breakpad::StabsReader reader(stabs, stab_section->sh_size, michael@0: stabstr, stabstr_section->sh_size, michael@0: big_endian, 4, true, &handler); michael@0: // Read the STABS data, and do post-processing. michael@0: if (!reader.Process()) michael@0: return false; michael@0: handler.Finalize(); michael@0: return true; michael@0: } michael@0: #endif // NO_STABS_SUPPORT michael@0: michael@0: // A line-to-module loader that accepts line number info parsed by michael@0: // dwarf2reader::LineInfo and populates a Module and a line vector michael@0: // with the results. michael@0: class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler { michael@0: public: michael@0: // Create a line-to-module converter using BYTE_READER. michael@0: explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader) michael@0: : byte_reader_(byte_reader) { } michael@0: void StartCompilationUnit(const string& compilation_dir) { michael@0: compilation_dir_ = compilation_dir; michael@0: } michael@0: void ReadProgram(const char *program, uint64 length, michael@0: Module *module, std::vector *lines) { michael@0: DwarfLineToModule handler(module, compilation_dir_, lines); michael@0: dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); michael@0: parser.Start(); michael@0: } michael@0: private: michael@0: string compilation_dir_; michael@0: dwarf2reader::ByteReader *byte_reader_; michael@0: }; michael@0: michael@0: template michael@0: bool LoadDwarf(const string& dwarf_filename, michael@0: const typename ElfClass::Ehdr* elf_header, michael@0: const bool big_endian, michael@0: Module* module) { michael@0: typedef typename ElfClass::Shdr Shdr; michael@0: michael@0: const dwarf2reader::Endianness endianness = big_endian ? michael@0: dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; michael@0: dwarf2reader::ByteReader byte_reader(endianness); michael@0: michael@0: // Construct a context for this file. michael@0: DwarfCUToModule::FileContext file_context(dwarf_filename, module); michael@0: michael@0: // Build a map of the ELF file's sections. michael@0: const Shdr* sections = michael@0: GetOffset(elf_header, elf_header->e_shoff); michael@0: int num_sections = elf_header->e_shnum; michael@0: const Shdr* section_names = sections + elf_header->e_shstrndx; michael@0: for (int i = 0; i < num_sections; i++) { michael@0: const Shdr* section = §ions[i]; michael@0: string name = GetOffset(elf_header, michael@0: section_names->sh_offset) + michael@0: section->sh_name; michael@0: const char* contents = GetOffset(elf_header, michael@0: section->sh_offset); michael@0: uint64 length = section->sh_size; michael@0: file_context.section_map[name] = std::make_pair(contents, length); michael@0: } michael@0: michael@0: // Parse all the compilation units in the .debug_info section. michael@0: DumperLineToModule line_to_module(&byte_reader); michael@0: std::pair debug_info_section michael@0: = file_context.section_map[".debug_info"]; michael@0: // This should never have been called if the file doesn't have a michael@0: // .debug_info section. michael@0: assert(debug_info_section.first); michael@0: uint64 debug_info_length = debug_info_section.second; michael@0: for (uint64 offset = 0; offset < debug_info_length;) { michael@0: // Make a handler for the root DIE that populates MODULE with the michael@0: // data that was found. michael@0: DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset); michael@0: DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); michael@0: // Make a Dwarf2Handler that drives the DIEHandler. michael@0: dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); michael@0: // Make a DWARF parser for the compilation unit at OFFSET. michael@0: dwarf2reader::CompilationUnit reader(file_context.section_map, michael@0: offset, michael@0: &byte_reader, michael@0: &die_dispatcher); michael@0: // Process the entire compilation unit; get the offset of the next. michael@0: offset += reader.Start(); michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: // Fill REGISTER_NAMES with the register names appropriate to the michael@0: // machine architecture given in HEADER, indexed by the register michael@0: // numbers used in DWARF call frame information. Return true on michael@0: // success, or false if HEADER's machine architecture is not michael@0: // supported. michael@0: template michael@0: bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, michael@0: std::vector* register_names) { michael@0: switch (elf_header->e_machine) { michael@0: case EM_386: michael@0: *register_names = DwarfCFIToModule::RegisterNames::I386(); michael@0: return true; michael@0: case EM_ARM: michael@0: *register_names = DwarfCFIToModule::RegisterNames::ARM(); michael@0: return true; michael@0: case EM_X86_64: michael@0: *register_names = DwarfCFIToModule::RegisterNames::X86_64(); michael@0: return true; michael@0: default: michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: template michael@0: bool LoadDwarfCFI(const string& dwarf_filename, michael@0: const typename ElfClass::Ehdr* elf_header, michael@0: const char* section_name, michael@0: const typename ElfClass::Shdr* section, michael@0: const bool eh_frame, michael@0: const typename ElfClass::Shdr* got_section, michael@0: const typename ElfClass::Shdr* text_section, michael@0: const bool big_endian, michael@0: Module* module) { michael@0: // Find the appropriate set of register names for this file's michael@0: // architecture. michael@0: std::vector register_names; michael@0: if (!DwarfCFIRegisterNames(elf_header, ®ister_names)) { michael@0: fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" michael@0: " cannot convert DWARF call frame information\n", michael@0: dwarf_filename.c_str(), elf_header->e_machine); michael@0: return false; michael@0: } michael@0: michael@0: const dwarf2reader::Endianness endianness = big_endian ? michael@0: dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; michael@0: michael@0: // Find the call frame information and its size. michael@0: const char* cfi = michael@0: GetOffset(elf_header, section->sh_offset); michael@0: size_t cfi_size = section->sh_size; michael@0: michael@0: // Plug together the parser, handler, and their entourages. michael@0: DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name); michael@0: DwarfCFIToModule handler(module, register_names, &module_reporter); michael@0: dwarf2reader::ByteReader byte_reader(endianness); michael@0: michael@0: byte_reader.SetAddressSize(ElfClass::kAddrSize); michael@0: michael@0: // Provide the base addresses for .eh_frame encoded pointers, if michael@0: // possible. michael@0: byte_reader.SetCFIDataBase(section->sh_addr, cfi); michael@0: if (got_section) michael@0: byte_reader.SetDataBase(got_section->sh_addr); michael@0: if (text_section) michael@0: byte_reader.SetTextBase(text_section->sh_addr); michael@0: michael@0: dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename, michael@0: section_name); michael@0: dwarf2reader::CallFrameInfo parser(cfi, cfi_size, michael@0: &byte_reader, &handler, &dwarf_reporter, michael@0: eh_frame); michael@0: parser.Start(); michael@0: return true; michael@0: } michael@0: michael@0: template michael@0: bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header, michael@0: const typename ElfClass::Shdr* exidx_section, michael@0: const typename ElfClass::Shdr* extab_section, michael@0: uint32_t loading_addr, michael@0: Module* module) { michael@0: // To do this properly we need to know: michael@0: // * the bounds of the .ARM.exidx section in the mapped image michael@0: // * the bounds of the .ARM.extab section in the mapped image michael@0: // * the vma of the last byte in the text section associated with the .exidx michael@0: // The first two are easy. The third is a bit tricky. If we can't michael@0: // figure out what it is, just pass in zero. michael@0: const char *exidx_img michael@0: = GetOffset(elf_header, exidx_section->sh_offset); michael@0: size_t exidx_size = exidx_section->sh_size; michael@0: const char *extab_img michael@0: = GetOffset(elf_header, extab_section->sh_offset); michael@0: size_t extab_size = extab_section->sh_size; michael@0: michael@0: // The sh_link field of the exidx section gives the section number michael@0: // for the associated text section. michael@0: uint32_t exidx_text_last_svma = 0; michael@0: int exidx_text_sno = exidx_section->sh_link; michael@0: typedef typename ElfClass::Shdr Shdr; michael@0: // |sections| points to the section header table michael@0: const Shdr* sections michael@0: = GetOffset(elf_header, elf_header->e_shoff); michael@0: const int num_sections = elf_header->e_shnum; michael@0: if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) { michael@0: const Shdr* exidx_text_shdr = §ions[exidx_text_sno]; michael@0: if (exidx_text_shdr->sh_size > 0) { michael@0: exidx_text_last_svma michael@0: = exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1; michael@0: } michael@0: } michael@0: michael@0: arm_ex_to_module::ARMExToModule handler(module); michael@0: arm_ex_reader::ExceptionTableInfo michael@0: parser(exidx_img, exidx_size, extab_img, extab_size, exidx_text_last_svma, michael@0: &handler, michael@0: reinterpret_cast(elf_header), michael@0: loading_addr); michael@0: parser.Start(); michael@0: return true; michael@0: } michael@0: michael@0: bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, michael@0: void** elf_header) { michael@0: int obj_fd = open(obj_file.c_str(), O_RDONLY); michael@0: if (obj_fd < 0) { michael@0: fprintf(stderr, "Failed to open ELF file '%s': %s\n", michael@0: obj_file.c_str(), strerror(errno)); michael@0: return false; michael@0: } michael@0: FDWrapper obj_fd_wrapper(obj_fd); michael@0: struct stat st; michael@0: if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { michael@0: fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", michael@0: obj_file.c_str(), strerror(errno)); michael@0: return false; michael@0: } michael@0: void *obj_base = mmap(NULL, st.st_size, michael@0: PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0); michael@0: if (obj_base == MAP_FAILED) { michael@0: fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", michael@0: obj_file.c_str(), strerror(errno)); michael@0: return false; michael@0: } michael@0: map_wrapper->set(obj_base, st.st_size); michael@0: *elf_header = obj_base; michael@0: if (!IsValidElf(*elf_header)) { michael@0: fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); michael@0: return false; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: // Get the endianness of ELF_HEADER. If it's invalid, return false. michael@0: template michael@0: bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, michael@0: bool* big_endian) { michael@0: if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { michael@0: *big_endian = false; michael@0: return true; michael@0: } michael@0: if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { michael@0: *big_endian = true; michael@0: return true; michael@0: } michael@0: michael@0: fprintf(stderr, "bad data encoding in ELF header: %d\n", michael@0: elf_header->e_ident[EI_DATA]); michael@0: return false; michael@0: } michael@0: michael@0: // Read the .gnu_debuglink and get the debug file name. If anything goes michael@0: // wrong, return an empty string. michael@0: template michael@0: string ReadDebugLink(const char* debuglink, michael@0: size_t debuglink_size, michael@0: const string& obj_file, michael@0: const std::vector& debug_dirs) { michael@0: size_t debuglink_len = strlen(debuglink) + 5; // '\0' + CRC32. michael@0: debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round to nearest 4 bytes. michael@0: michael@0: // Sanity check. michael@0: if (debuglink_len != debuglink_size) { michael@0: fprintf(stderr, "Mismatched .gnu_debuglink string / section size: " michael@0: "%zx %zx\n", debuglink_len, debuglink_size); michael@0: return ""; michael@0: } michael@0: michael@0: bool found = false; michael@0: int debuglink_fd = -1; michael@0: string debuglink_path; michael@0: std::vector::const_iterator it; michael@0: for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) { michael@0: const string& debug_dir = *it; michael@0: debuglink_path = debug_dir + "/" + debuglink; michael@0: debuglink_fd = open(debuglink_path.c_str(), O_RDONLY); michael@0: if (debuglink_fd >= 0) { michael@0: found = true; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (!found) { michael@0: fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n", michael@0: obj_file.c_str()); michael@0: for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) { michael@0: const string debug_dir = *it; michael@0: fprintf(stderr, " %s/%s\n", debug_dir.c_str(), debuglink); michael@0: } michael@0: return ""; michael@0: } michael@0: michael@0: FDWrapper debuglink_fd_wrapper(debuglink_fd); michael@0: // TODO(thestig) check the CRC-32 at the end of the .gnu_debuglink michael@0: // section. michael@0: michael@0: return debuglink_path; michael@0: } michael@0: michael@0: // michael@0: // LoadSymbolsInfo michael@0: // michael@0: // Holds the state between the two calls to LoadSymbols() in case it's necessary michael@0: // to follow the .gnu_debuglink section and load debug information from a michael@0: // different file. michael@0: // michael@0: template michael@0: class LoadSymbolsInfo { michael@0: public: michael@0: typedef typename ElfClass::Addr Addr; michael@0: michael@0: explicit LoadSymbolsInfo(const std::vector& dbg_dirs) : michael@0: debug_dirs_(dbg_dirs), michael@0: has_loading_addr_(false) {} michael@0: michael@0: // Keeps track of which sections have been loaded so sections don't michael@0: // accidentally get loaded twice from two different files. michael@0: void LoadedSection(const string §ion) { michael@0: if (loaded_sections_.count(section) == 0) { michael@0: loaded_sections_.insert(section); michael@0: } else { michael@0: fprintf(stderr, "Section %s has already been loaded.\n", michael@0: section.c_str()); michael@0: } michael@0: } michael@0: michael@0: // The ELF file and linked debug file are expected to have the same preferred michael@0: // loading address. michael@0: void set_loading_addr(Addr addr, const string &filename) { michael@0: if (!has_loading_addr_) { michael@0: loading_addr_ = addr; michael@0: loaded_file_ = filename; michael@0: return; michael@0: } michael@0: michael@0: if (addr != loading_addr_) { michael@0: fprintf(stderr, michael@0: "ELF file '%s' and debug ELF file '%s' " michael@0: "have different load addresses.\n", michael@0: loaded_file_.c_str(), filename.c_str()); michael@0: assert(false); michael@0: } michael@0: } michael@0: michael@0: // Setters and getters michael@0: const std::vector& debug_dirs() const { michael@0: return debug_dirs_; michael@0: } michael@0: michael@0: string debuglink_file() const { michael@0: return debuglink_file_; michael@0: } michael@0: void set_debuglink_file(string file) { michael@0: debuglink_file_ = file; michael@0: } michael@0: michael@0: private: michael@0: const std::vector& debug_dirs_; // Directories in which to michael@0: // search for the debug ELF file. michael@0: michael@0: string debuglink_file_; // Full path to the debug ELF file. michael@0: michael@0: bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. michael@0: michael@0: Addr loading_addr_; // Saves the preferred loading address from the michael@0: // first call to LoadSymbols(). michael@0: michael@0: string loaded_file_; // Name of the file loaded from the first call to michael@0: // LoadSymbols(). michael@0: michael@0: std::set loaded_sections_; // Tracks the Loaded ELF sections michael@0: // between calls to LoadSymbols(). michael@0: }; michael@0: michael@0: template michael@0: bool LoadSymbols(const string& obj_file, michael@0: const bool big_endian, michael@0: const typename ElfClass::Ehdr* elf_header, michael@0: const bool read_gnu_debug_link, michael@0: LoadSymbolsInfo* info, michael@0: SymbolData symbol_data, michael@0: Module* module) { michael@0: typedef typename ElfClass::Addr Addr; michael@0: typedef typename ElfClass::Phdr Phdr; michael@0: typedef typename ElfClass::Shdr Shdr; michael@0: michael@0: BPLOG(INFO) << ""; michael@0: BPLOG(INFO) << "LoadSymbols: BEGIN " << obj_file; michael@0: michael@0: Addr loading_addr = GetLoadingAddress( michael@0: GetOffset(elf_header, elf_header->e_phoff), michael@0: elf_header->e_phnum); michael@0: module->SetLoadAddress(loading_addr); michael@0: info->set_loading_addr(loading_addr, obj_file); michael@0: michael@0: const Shdr* sections = michael@0: GetOffset(elf_header, elf_header->e_shoff); michael@0: const Shdr* section_names = sections + elf_header->e_shstrndx; michael@0: const char* names = michael@0: GetOffset(elf_header, section_names->sh_offset); michael@0: const char *names_end = names + section_names->sh_size; michael@0: bool found_debug_info_section = false; michael@0: bool found_usable_info = false; michael@0: michael@0: if (symbol_data != ONLY_CFI) { michael@0: #ifndef NO_STABS_SUPPORT michael@0: // Look for STABS debugging information, and load it if present. michael@0: const Shdr* stab_section = michael@0: FindElfSectionByName(".stab", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: if (stab_section) { michael@0: const Shdr* stabstr_section = stab_section->sh_link + sections; michael@0: if (stabstr_section) { michael@0: found_debug_info_section = true; michael@0: found_usable_info = true; michael@0: info->LoadedSection(".stab"); michael@0: if (!LoadStabs(elf_header, stab_section, stabstr_section, michael@0: big_endian, module)) { michael@0: fprintf(stderr, "%s: \".stab\" section found, but failed to load" michael@0: " STABS debugging information\n", obj_file.c_str()); michael@0: } michael@0: } michael@0: } michael@0: #endif // NO_STABS_SUPPORT michael@0: michael@0: // Look for DWARF debugging information, and load it if present. michael@0: const Shdr* dwarf_section = michael@0: FindElfSectionByName(".debug_info", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: if (dwarf_section) { michael@0: found_debug_info_section = true; michael@0: found_usable_info = true; michael@0: info->LoadedSection(".debug_info"); michael@0: if (!LoadDwarf(obj_file, elf_header, big_endian, module)) michael@0: fprintf(stderr, "%s: \".debug_info\" section found, but failed to load " michael@0: "DWARF debugging information\n", obj_file.c_str()); michael@0: } michael@0: } michael@0: michael@0: if (symbol_data != NO_CFI) { michael@0: // Dwarf Call Frame Information (CFI) is actually independent from michael@0: // the other DWARF debugging information, and can be used alone. michael@0: const Shdr* dwarf_cfi_section = michael@0: FindElfSectionByName(".debug_frame", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: if (dwarf_cfi_section) { michael@0: // Ignore the return value of this function; even without call frame michael@0: // information, the other debugging information could be perfectly michael@0: // useful. michael@0: info->LoadedSection(".debug_frame"); michael@0: bool result = michael@0: LoadDwarfCFI(obj_file, elf_header, ".debug_frame", michael@0: dwarf_cfi_section, false, 0, 0, big_endian, michael@0: module); michael@0: found_usable_info = found_usable_info || result; michael@0: if (result) michael@0: BPLOG(INFO) << "LoadSymbols: read CFI from .debug_frame"; michael@0: } michael@0: michael@0: // Linux C++ exception handling information can also provide michael@0: // unwinding data. michael@0: const Shdr* eh_frame_section = michael@0: FindElfSectionByName(".eh_frame", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: if (eh_frame_section) { michael@0: // Pointers in .eh_frame data may be relative to the base addresses of michael@0: // certain sections. Provide those sections if present. michael@0: const Shdr* got_section = michael@0: FindElfSectionByName(".got", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: const Shdr* text_section = michael@0: FindElfSectionByName(".text", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: info->LoadedSection(".eh_frame"); michael@0: // As above, ignore the return value of this function. michael@0: bool result = michael@0: LoadDwarfCFI(obj_file, elf_header, ".eh_frame", michael@0: eh_frame_section, true, michael@0: got_section, text_section, big_endian, module); michael@0: found_usable_info = found_usable_info || result; michael@0: if (result) michael@0: BPLOG(INFO) << "LoadSymbols: read CFI from .eh_frame"; michael@0: } michael@0: } michael@0: michael@0: // ARM has special unwind tables that can be used. michael@0: const Shdr* arm_exidx_section = michael@0: FindElfSectionByName(".ARM.exidx", SHT_ARM_EXIDX, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: const Shdr* arm_extab_section = michael@0: FindElfSectionByName(".ARM.extab", SHT_PROGBITS, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: // Only load information from this section if there isn't a .debug_info michael@0: // section. michael@0: if (!found_debug_info_section michael@0: && arm_exidx_section && arm_extab_section && symbol_data != NO_CFI) { michael@0: info->LoadedSection(".ARM.exidx"); michael@0: info->LoadedSection(".ARM.extab"); michael@0: bool result = LoadARMexidx(elf_header, michael@0: arm_exidx_section, arm_extab_section, michael@0: loading_addr, module); michael@0: found_usable_info = found_usable_info || result; michael@0: if (result) michael@0: BPLOG(INFO) << "LoadSymbols: read EXIDX from .ARM.{exidx,extab}"; michael@0: } michael@0: michael@0: if (!found_debug_info_section && symbol_data != ONLY_CFI) { michael@0: fprintf(stderr, "%s: file contains no debugging information" michael@0: " (no \".stab\" or \".debug_info\" sections)\n", michael@0: obj_file.c_str()); michael@0: michael@0: // Failed, but maybe there's a .gnu_debuglink section? michael@0: if (read_gnu_debug_link) { michael@0: const Shdr* gnu_debuglink_section michael@0: = FindElfSectionByName(".gnu_debuglink", SHT_PROGBITS, michael@0: sections, names, michael@0: names_end, elf_header->e_shnum); michael@0: if (gnu_debuglink_section) { michael@0: if (!info->debug_dirs().empty()) { michael@0: const char* debuglink_contents = michael@0: GetOffset(elf_header, michael@0: gnu_debuglink_section->sh_offset); michael@0: string debuglink_file michael@0: = ReadDebugLink(debuglink_contents, michael@0: gnu_debuglink_section->sh_size, michael@0: obj_file, info->debug_dirs()); michael@0: info->set_debuglink_file(debuglink_file); michael@0: } else { michael@0: fprintf(stderr, ".gnu_debuglink section found in '%s', " michael@0: "but no debug path specified.\n", obj_file.c_str()); michael@0: } michael@0: } else { michael@0: fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n", michael@0: obj_file.c_str()); michael@0: } michael@0: } else { michael@0: if (symbol_data != ONLY_CFI) { michael@0: // The caller doesn't want to consult .gnu_debuglink. michael@0: // See if there are export symbols available. michael@0: const Shdr* dynsym_section = michael@0: FindElfSectionByName(".dynsym", SHT_DYNSYM, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: const Shdr* dynstr_section = michael@0: FindElfSectionByName(".dynstr", SHT_STRTAB, michael@0: sections, names, names_end, michael@0: elf_header->e_shnum); michael@0: if (dynsym_section && dynstr_section) { michael@0: info->LoadedSection(".dynsym"); michael@0: michael@0: const uint8_t* dynsyms = michael@0: GetOffset(elf_header, michael@0: dynsym_section->sh_offset); michael@0: const uint8_t* dynstrs = michael@0: GetOffset(elf_header, michael@0: dynstr_section->sh_offset); michael@0: bool result = michael@0: ELFSymbolsToModule(dynsyms, michael@0: dynsym_section->sh_size, michael@0: dynstrs, michael@0: dynstr_section->sh_size, michael@0: big_endian, michael@0: ElfClass::kAddrSize, michael@0: module); michael@0: found_usable_info = found_usable_info || result; michael@0: } michael@0: } michael@0: michael@0: // Return true if some usable information was found, since michael@0: // the caller doesn't want to use .gnu_debuglink. michael@0: BPLOG(INFO) << "LoadSymbols: " michael@0: << (found_usable_info ? "SUCCESS " : "FAILURE ") michael@0: << obj_file; michael@0: return found_usable_info; michael@0: } michael@0: michael@0: // No debug info was found, let the user try again with .gnu_debuglink michael@0: // if present. michael@0: BPLOG(INFO) << "LoadSymbols: FAILURE " << obj_file; michael@0: return false; michael@0: } michael@0: michael@0: BPLOG(INFO) << "LoadSymbols: SUCCESS " << obj_file; michael@0: return true; michael@0: } michael@0: michael@0: // Return the breakpad symbol file identifier for the architecture of michael@0: // ELF_HEADER. michael@0: template michael@0: const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { michael@0: typedef typename ElfClass::Half Half; michael@0: Half arch = elf_header->e_machine; michael@0: switch (arch) { michael@0: case EM_386: return "x86"; michael@0: case EM_ARM: return "arm"; michael@0: case EM_MIPS: return "mips"; michael@0: case EM_PPC64: return "ppc64"; michael@0: case EM_PPC: return "ppc"; michael@0: case EM_S390: return "s390"; michael@0: case EM_SPARC: return "sparc"; michael@0: case EM_SPARCV9: return "sparcv9"; michael@0: case EM_X86_64: return "x86_64"; michael@0: default: return NULL; michael@0: } michael@0: } michael@0: michael@0: // Format the Elf file identifier in IDENTIFIER as a UUID with the michael@0: // dashes removed. michael@0: string FormatIdentifier(unsigned char identifier[16]) { michael@0: char identifier_str[40]; michael@0: google_breakpad::FileID::ConvertIdentifierToString( michael@0: identifier, michael@0: identifier_str, michael@0: sizeof(identifier_str)); michael@0: string id_no_dash; michael@0: for (int i = 0; identifier_str[i] != '\0'; ++i) michael@0: if (identifier_str[i] != '-') michael@0: id_no_dash += identifier_str[i]; michael@0: // Add an extra "0" by the end. PDB files on Windows have an 'age' michael@0: // number appended to the end of the file identifier; this isn't michael@0: // really used or necessary on other platforms, but be consistent. michael@0: id_no_dash += '0'; michael@0: return id_no_dash; michael@0: } michael@0: michael@0: // Return the non-directory portion of FILENAME: the portion after the michael@0: // last slash, or the whole filename if there are no slashes. michael@0: string BaseFileName(const string &filename) { michael@0: // Lots of copies! basename's behavior is less than ideal. michael@0: char *c_filename = strdup(filename.c_str()); michael@0: string base = basename(c_filename); michael@0: free(c_filename); michael@0: return base; michael@0: } michael@0: michael@0: template michael@0: bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, michael@0: const string& obj_filename, michael@0: const std::vector& debug_dirs, michael@0: SymbolData symbol_data, michael@0: Module** out_module) { michael@0: typedef typename ElfClass::Ehdr Ehdr; michael@0: typedef typename ElfClass::Shdr Shdr; michael@0: michael@0: *out_module = NULL; michael@0: michael@0: unsigned char identifier[16]; michael@0: if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header, michael@0: identifier)) { michael@0: fprintf(stderr, "%s: unable to generate file identifier\n", michael@0: obj_filename.c_str()); michael@0: return false; michael@0: } michael@0: michael@0: const char *architecture = ElfArchitecture(elf_header); michael@0: if (!architecture) { michael@0: fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", michael@0: obj_filename.c_str(), elf_header->e_machine); michael@0: return false; michael@0: } michael@0: michael@0: // Figure out what endianness this file is. michael@0: bool big_endian; michael@0: if (!ElfEndianness(elf_header, &big_endian)) michael@0: return false; michael@0: michael@0: string name = BaseFileName(obj_filename); michael@0: string os = "Linux"; michael@0: string id = FormatIdentifier(identifier); michael@0: michael@0: LoadSymbolsInfo info(debug_dirs); michael@0: scoped_ptr module(new Module(name, os, architecture, id)); michael@0: if (!LoadSymbols(obj_filename, big_endian, elf_header, michael@0: !debug_dirs.empty(), &info, michael@0: symbol_data, module.get())) { michael@0: const string debuglink_file = info.debuglink_file(); michael@0: if (debuglink_file.empty()) michael@0: return false; michael@0: michael@0: // Load debuglink ELF file. michael@0: fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); michael@0: MmapWrapper debug_map_wrapper; michael@0: Ehdr* debug_elf_header = NULL; michael@0: if (!LoadELF(debuglink_file, &debug_map_wrapper, michael@0: reinterpret_cast(&debug_elf_header))) michael@0: return false; michael@0: // Sanity checks to make sure everything matches up. michael@0: const char *debug_architecture = michael@0: ElfArchitecture(debug_elf_header); michael@0: if (!debug_architecture) { michael@0: fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", michael@0: debuglink_file.c_str(), debug_elf_header->e_machine); michael@0: return false; michael@0: } michael@0: if (strcmp(architecture, debug_architecture)) { michael@0: fprintf(stderr, "%s with ELF machine architecture %s does not match " michael@0: "%s with ELF architecture %s\n", michael@0: debuglink_file.c_str(), debug_architecture, michael@0: obj_filename.c_str(), architecture); michael@0: return false; michael@0: } michael@0: michael@0: bool debug_big_endian; michael@0: if (!ElfEndianness(debug_elf_header, &debug_big_endian)) michael@0: return false; michael@0: if (debug_big_endian != big_endian) { michael@0: fprintf(stderr, "%s and %s does not match in endianness\n", michael@0: obj_filename.c_str(), debuglink_file.c_str()); michael@0: return false; michael@0: } michael@0: michael@0: if (!LoadSymbols(debuglink_file, debug_big_endian, michael@0: debug_elf_header, false, &info, michael@0: symbol_data, module.get())) { michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: *out_module = module.release(); michael@0: return true; michael@0: } michael@0: michael@0: } // namespace michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: // Not explicitly exported, but not static so it can be used in unit tests. michael@0: bool ReadSymbolDataInternal(const uint8_t* obj_file, michael@0: const string& obj_filename, michael@0: const std::vector& debug_dirs, michael@0: SymbolData symbol_data, michael@0: Module** module) { michael@0: michael@0: if (!IsValidElf(obj_file)) { michael@0: fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); michael@0: return false; michael@0: } michael@0: michael@0: int elfclass = ElfClass(obj_file); michael@0: if (elfclass == ELFCLASS32) { michael@0: return ReadSymbolDataElfClass( michael@0: reinterpret_cast(obj_file), obj_filename, debug_dirs, michael@0: symbol_data, module); michael@0: } michael@0: if (elfclass == ELFCLASS64) { michael@0: return ReadSymbolDataElfClass( michael@0: reinterpret_cast(obj_file), obj_filename, debug_dirs, michael@0: symbol_data, module); michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool WriteSymbolFile(const string &obj_file, michael@0: const std::vector& debug_dirs, michael@0: SymbolData symbol_data, michael@0: std::ostream &sym_stream) { michael@0: Module* module; michael@0: if (!ReadSymbolData(obj_file, debug_dirs, symbol_data, &module)) michael@0: return false; michael@0: michael@0: bool result = module->Write(sym_stream, symbol_data); michael@0: delete module; michael@0: return result; michael@0: } michael@0: michael@0: bool ReadSymbolData(const string& obj_file, michael@0: const std::vector& debug_dirs, michael@0: SymbolData symbol_data, michael@0: Module** module) { michael@0: MmapWrapper map_wrapper; michael@0: void* elf_header = NULL; michael@0: if (!LoadELF(obj_file, &map_wrapper, &elf_header)) michael@0: return false; michael@0: michael@0: return ReadSymbolDataInternal(reinterpret_cast(elf_header), michael@0: obj_file, debug_dirs, symbol_data, module); michael@0: } michael@0: michael@0: } // namespace google_breakpad