1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/linux/dump_symbols.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,985 @@ 1.4 +// Copyright (c) 2011 Google Inc. 1.5 +// All rights reserved. 1.6 +// 1.7 +// Redistribution and use in source and binary forms, with or without 1.8 +// modification, are permitted provided that the following conditions are 1.9 +// met: 1.10 +// 1.11 +// * Redistributions of source code must retain the above copyright 1.12 +// notice, this list of conditions and the following disclaimer. 1.13 +// * Redistributions in binary form must reproduce the above 1.14 +// copyright notice, this list of conditions and the following disclaimer 1.15 +// in the documentation and/or other materials provided with the 1.16 +// distribution. 1.17 +// * Neither the name of Google Inc. nor the names of its 1.18 +// contributors may be used to endorse or promote products derived from 1.19 +// this software without specific prior written permission. 1.20 +// 1.21 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.22 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.23 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.24 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.25 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.26 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.27 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.28 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.29 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.30 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.31 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.32 + 1.33 +// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 1.34 + 1.35 +// dump_symbols.cc: implement google_breakpad::WriteSymbolFile: 1.36 +// Find all the debugging info in a file and dump it as a Breakpad symbol file. 1.37 + 1.38 +#include "common/linux/dump_symbols.h" 1.39 + 1.40 +#include <assert.h> 1.41 +#include <elf.h> 1.42 +#include <errno.h> 1.43 +#include <fcntl.h> 1.44 +#include <link.h> 1.45 +#include <stdio.h> 1.46 +#include <stdlib.h> 1.47 +#include <string.h> 1.48 +#include <sys/mman.h> 1.49 +#include <sys/stat.h> 1.50 +#include <unistd.h> 1.51 + 1.52 +#include <iostream> 1.53 +#include <set> 1.54 +#include <string> 1.55 +#include <utility> 1.56 +#include <vector> 1.57 + 1.58 +#include "common/arm_ex_reader.h" 1.59 +#include "common/dwarf/bytereader-inl.h" 1.60 +#include "common/dwarf/dwarf2diehandler.h" 1.61 +#include "common/dwarf_cfi_to_module.h" 1.62 +#include "common/dwarf_cu_to_module.h" 1.63 +#include "common/dwarf_line_to_module.h" 1.64 +#include "common/linux/elfutils.h" 1.65 +#include "common/linux/elfutils-inl.h" 1.66 +#include "common/linux/elf_symbols_to_module.h" 1.67 +#include "common/linux/file_id.h" 1.68 +#include "common/module.h" 1.69 +#include "common/scoped_ptr.h" 1.70 +#ifndef NO_STABS_SUPPORT 1.71 +#include "common/stabs_reader.h" 1.72 +#include "common/stabs_to_module.h" 1.73 +#endif 1.74 +#include "common/using_std_string.h" 1.75 +#include "common/logging.h" 1.76 + 1.77 +#ifndef SHT_ARM_EXIDX 1.78 +// bionic and older glibc don't define it 1.79 +# define SHT_ARM_EXIDX (SHT_LOPROC + 1) 1.80 +#endif 1.81 + 1.82 +// This namespace contains helper functions. 1.83 +namespace { 1.84 + 1.85 +using google_breakpad::DwarfCFIToModule; 1.86 +using google_breakpad::DwarfCUToModule; 1.87 +using google_breakpad::DwarfLineToModule; 1.88 +using google_breakpad::ElfClass; 1.89 +using google_breakpad::ElfClass32; 1.90 +using google_breakpad::ElfClass64; 1.91 +using google_breakpad::FindElfSectionByName; 1.92 +using google_breakpad::GetOffset; 1.93 +using google_breakpad::IsValidElf; 1.94 +using google_breakpad::Module; 1.95 +#ifndef NO_STABS_SUPPORT 1.96 +using google_breakpad::StabsToModule; 1.97 +#endif 1.98 +using google_breakpad::UniqueString; 1.99 +using google_breakpad::scoped_ptr; 1.100 + 1.101 +// 1.102 +// FDWrapper 1.103 +// 1.104 +// Wrapper class to make sure opened file is closed. 1.105 +// 1.106 +class FDWrapper { 1.107 + public: 1.108 + explicit FDWrapper(int fd) : 1.109 + fd_(fd) {} 1.110 + ~FDWrapper() { 1.111 + if (fd_ != -1) 1.112 + close(fd_); 1.113 + } 1.114 + int get() { 1.115 + return fd_; 1.116 + } 1.117 + int release() { 1.118 + int fd = fd_; 1.119 + fd_ = -1; 1.120 + return fd; 1.121 + } 1.122 + private: 1.123 + int fd_; 1.124 +}; 1.125 + 1.126 +// 1.127 +// MmapWrapper 1.128 +// 1.129 +// Wrapper class to make sure mapped regions are unmapped. 1.130 +// 1.131 +class MmapWrapper { 1.132 + public: 1.133 + MmapWrapper() : is_set_(false) {} 1.134 + ~MmapWrapper() { 1.135 + if (is_set_ && base_ != NULL) { 1.136 + assert(size_ > 0); 1.137 + munmap(base_, size_); 1.138 + } 1.139 + } 1.140 + void set(void *mapped_address, size_t mapped_size) { 1.141 + is_set_ = true; 1.142 + base_ = mapped_address; 1.143 + size_ = mapped_size; 1.144 + } 1.145 + void release() { 1.146 + assert(is_set_); 1.147 + is_set_ = false; 1.148 + base_ = NULL; 1.149 + size_ = 0; 1.150 + } 1.151 + 1.152 + private: 1.153 + bool is_set_; 1.154 + void *base_; 1.155 + size_t size_; 1.156 +}; 1.157 + 1.158 +// Find the preferred loading address of the binary. 1.159 +template<typename ElfClass> 1.160 +typename ElfClass::Addr GetLoadingAddress( 1.161 + const typename ElfClass::Phdr* program_headers, 1.162 + int nheader) { 1.163 + typedef typename ElfClass::Phdr Phdr; 1.164 + 1.165 + for (int i = 0; i < nheader; ++i) { 1.166 + const Phdr& header = program_headers[i]; 1.167 + // For executable, it is the PT_LOAD segment with offset to zero. 1.168 + if (header.p_type == PT_LOAD && 1.169 + header.p_offset == 0) 1.170 + return header.p_vaddr; 1.171 + } 1.172 + // For other types of ELF, return 0. 1.173 + return 0; 1.174 +} 1.175 + 1.176 +#ifndef NO_STABS_SUPPORT 1.177 +template<typename ElfClass> 1.178 +bool LoadStabs(const typename ElfClass::Ehdr* elf_header, 1.179 + const typename ElfClass::Shdr* stab_section, 1.180 + const typename ElfClass::Shdr* stabstr_section, 1.181 + const bool big_endian, 1.182 + Module* module) { 1.183 + // A callback object to handle data from the STABS reader. 1.184 + StabsToModule handler(module); 1.185 + // Find the addresses of the STABS data, and create a STABS reader object. 1.186 + // On Linux, STABS entries always have 32-bit values, regardless of the 1.187 + // address size of the architecture whose code they're describing, and 1.188 + // the strings are always "unitized". 1.189 + const uint8_t* stabs = 1.190 + GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset); 1.191 + const uint8_t* stabstr = 1.192 + GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset); 1.193 + google_breakpad::StabsReader reader(stabs, stab_section->sh_size, 1.194 + stabstr, stabstr_section->sh_size, 1.195 + big_endian, 4, true, &handler); 1.196 + // Read the STABS data, and do post-processing. 1.197 + if (!reader.Process()) 1.198 + return false; 1.199 + handler.Finalize(); 1.200 + return true; 1.201 +} 1.202 +#endif // NO_STABS_SUPPORT 1.203 + 1.204 +// A line-to-module loader that accepts line number info parsed by 1.205 +// dwarf2reader::LineInfo and populates a Module and a line vector 1.206 +// with the results. 1.207 +class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler { 1.208 + public: 1.209 + // Create a line-to-module converter using BYTE_READER. 1.210 + explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader) 1.211 + : byte_reader_(byte_reader) { } 1.212 + void StartCompilationUnit(const string& compilation_dir) { 1.213 + compilation_dir_ = compilation_dir; 1.214 + } 1.215 + void ReadProgram(const char *program, uint64 length, 1.216 + Module *module, std::vector<Module::Line> *lines) { 1.217 + DwarfLineToModule handler(module, compilation_dir_, lines); 1.218 + dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); 1.219 + parser.Start(); 1.220 + } 1.221 + private: 1.222 + string compilation_dir_; 1.223 + dwarf2reader::ByteReader *byte_reader_; 1.224 +}; 1.225 + 1.226 +template<typename ElfClass> 1.227 +bool LoadDwarf(const string& dwarf_filename, 1.228 + const typename ElfClass::Ehdr* elf_header, 1.229 + const bool big_endian, 1.230 + Module* module) { 1.231 + typedef typename ElfClass::Shdr Shdr; 1.232 + 1.233 + const dwarf2reader::Endianness endianness = big_endian ? 1.234 + dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; 1.235 + dwarf2reader::ByteReader byte_reader(endianness); 1.236 + 1.237 + // Construct a context for this file. 1.238 + DwarfCUToModule::FileContext file_context(dwarf_filename, module); 1.239 + 1.240 + // Build a map of the ELF file's sections. 1.241 + const Shdr* sections = 1.242 + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); 1.243 + int num_sections = elf_header->e_shnum; 1.244 + const Shdr* section_names = sections + elf_header->e_shstrndx; 1.245 + for (int i = 0; i < num_sections; i++) { 1.246 + const Shdr* section = §ions[i]; 1.247 + string name = GetOffset<ElfClass, char>(elf_header, 1.248 + section_names->sh_offset) + 1.249 + section->sh_name; 1.250 + const char* contents = GetOffset<ElfClass, char>(elf_header, 1.251 + section->sh_offset); 1.252 + uint64 length = section->sh_size; 1.253 + file_context.section_map[name] = std::make_pair(contents, length); 1.254 + } 1.255 + 1.256 + // Parse all the compilation units in the .debug_info section. 1.257 + DumperLineToModule line_to_module(&byte_reader); 1.258 + std::pair<const char *, uint64> debug_info_section 1.259 + = file_context.section_map[".debug_info"]; 1.260 + // This should never have been called if the file doesn't have a 1.261 + // .debug_info section. 1.262 + assert(debug_info_section.first); 1.263 + uint64 debug_info_length = debug_info_section.second; 1.264 + for (uint64 offset = 0; offset < debug_info_length;) { 1.265 + // Make a handler for the root DIE that populates MODULE with the 1.266 + // data that was found. 1.267 + DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset); 1.268 + DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); 1.269 + // Make a Dwarf2Handler that drives the DIEHandler. 1.270 + dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); 1.271 + // Make a DWARF parser for the compilation unit at OFFSET. 1.272 + dwarf2reader::CompilationUnit reader(file_context.section_map, 1.273 + offset, 1.274 + &byte_reader, 1.275 + &die_dispatcher); 1.276 + // Process the entire compilation unit; get the offset of the next. 1.277 + offset += reader.Start(); 1.278 + } 1.279 + return true; 1.280 +} 1.281 + 1.282 +// Fill REGISTER_NAMES with the register names appropriate to the 1.283 +// machine architecture given in HEADER, indexed by the register 1.284 +// numbers used in DWARF call frame information. Return true on 1.285 +// success, or false if HEADER's machine architecture is not 1.286 +// supported. 1.287 +template<typename ElfClass> 1.288 +bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, 1.289 + std::vector<const UniqueString*>* register_names) { 1.290 + switch (elf_header->e_machine) { 1.291 + case EM_386: 1.292 + *register_names = DwarfCFIToModule::RegisterNames::I386(); 1.293 + return true; 1.294 + case EM_ARM: 1.295 + *register_names = DwarfCFIToModule::RegisterNames::ARM(); 1.296 + return true; 1.297 + case EM_X86_64: 1.298 + *register_names = DwarfCFIToModule::RegisterNames::X86_64(); 1.299 + return true; 1.300 + default: 1.301 + return false; 1.302 + } 1.303 +} 1.304 + 1.305 +template<typename ElfClass> 1.306 +bool LoadDwarfCFI(const string& dwarf_filename, 1.307 + const typename ElfClass::Ehdr* elf_header, 1.308 + const char* section_name, 1.309 + const typename ElfClass::Shdr* section, 1.310 + const bool eh_frame, 1.311 + const typename ElfClass::Shdr* got_section, 1.312 + const typename ElfClass::Shdr* text_section, 1.313 + const bool big_endian, 1.314 + Module* module) { 1.315 + // Find the appropriate set of register names for this file's 1.316 + // architecture. 1.317 + std::vector<const UniqueString*> register_names; 1.318 + if (!DwarfCFIRegisterNames<ElfClass>(elf_header, ®ister_names)) { 1.319 + fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" 1.320 + " cannot convert DWARF call frame information\n", 1.321 + dwarf_filename.c_str(), elf_header->e_machine); 1.322 + return false; 1.323 + } 1.324 + 1.325 + const dwarf2reader::Endianness endianness = big_endian ? 1.326 + dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; 1.327 + 1.328 + // Find the call frame information and its size. 1.329 + const char* cfi = 1.330 + GetOffset<ElfClass, char>(elf_header, section->sh_offset); 1.331 + size_t cfi_size = section->sh_size; 1.332 + 1.333 + // Plug together the parser, handler, and their entourages. 1.334 + DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name); 1.335 + DwarfCFIToModule handler(module, register_names, &module_reporter); 1.336 + dwarf2reader::ByteReader byte_reader(endianness); 1.337 + 1.338 + byte_reader.SetAddressSize(ElfClass::kAddrSize); 1.339 + 1.340 + // Provide the base addresses for .eh_frame encoded pointers, if 1.341 + // possible. 1.342 + byte_reader.SetCFIDataBase(section->sh_addr, cfi); 1.343 + if (got_section) 1.344 + byte_reader.SetDataBase(got_section->sh_addr); 1.345 + if (text_section) 1.346 + byte_reader.SetTextBase(text_section->sh_addr); 1.347 + 1.348 + dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename, 1.349 + section_name); 1.350 + dwarf2reader::CallFrameInfo parser(cfi, cfi_size, 1.351 + &byte_reader, &handler, &dwarf_reporter, 1.352 + eh_frame); 1.353 + parser.Start(); 1.354 + return true; 1.355 +} 1.356 + 1.357 +template<typename ElfClass> 1.358 +bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header, 1.359 + const typename ElfClass::Shdr* exidx_section, 1.360 + const typename ElfClass::Shdr* extab_section, 1.361 + uint32_t loading_addr, 1.362 + Module* module) { 1.363 + // To do this properly we need to know: 1.364 + // * the bounds of the .ARM.exidx section in the mapped image 1.365 + // * the bounds of the .ARM.extab section in the mapped image 1.366 + // * the vma of the last byte in the text section associated with the .exidx 1.367 + // The first two are easy. The third is a bit tricky. If we can't 1.368 + // figure out what it is, just pass in zero. 1.369 + const char *exidx_img 1.370 + = GetOffset<ElfClass, char>(elf_header, exidx_section->sh_offset); 1.371 + size_t exidx_size = exidx_section->sh_size; 1.372 + const char *extab_img 1.373 + = GetOffset<ElfClass, char>(elf_header, extab_section->sh_offset); 1.374 + size_t extab_size = extab_section->sh_size; 1.375 + 1.376 + // The sh_link field of the exidx section gives the section number 1.377 + // for the associated text section. 1.378 + uint32_t exidx_text_last_svma = 0; 1.379 + int exidx_text_sno = exidx_section->sh_link; 1.380 + typedef typename ElfClass::Shdr Shdr; 1.381 + // |sections| points to the section header table 1.382 + const Shdr* sections 1.383 + = GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); 1.384 + const int num_sections = elf_header->e_shnum; 1.385 + if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) { 1.386 + const Shdr* exidx_text_shdr = §ions[exidx_text_sno]; 1.387 + if (exidx_text_shdr->sh_size > 0) { 1.388 + exidx_text_last_svma 1.389 + = exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1; 1.390 + } 1.391 + } 1.392 + 1.393 + arm_ex_to_module::ARMExToModule handler(module); 1.394 + arm_ex_reader::ExceptionTableInfo 1.395 + parser(exidx_img, exidx_size, extab_img, extab_size, exidx_text_last_svma, 1.396 + &handler, 1.397 + reinterpret_cast<const char*>(elf_header), 1.398 + loading_addr); 1.399 + parser.Start(); 1.400 + return true; 1.401 +} 1.402 + 1.403 +bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, 1.404 + void** elf_header) { 1.405 + int obj_fd = open(obj_file.c_str(), O_RDONLY); 1.406 + if (obj_fd < 0) { 1.407 + fprintf(stderr, "Failed to open ELF file '%s': %s\n", 1.408 + obj_file.c_str(), strerror(errno)); 1.409 + return false; 1.410 + } 1.411 + FDWrapper obj_fd_wrapper(obj_fd); 1.412 + struct stat st; 1.413 + if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { 1.414 + fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", 1.415 + obj_file.c_str(), strerror(errno)); 1.416 + return false; 1.417 + } 1.418 + void *obj_base = mmap(NULL, st.st_size, 1.419 + PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0); 1.420 + if (obj_base == MAP_FAILED) { 1.421 + fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", 1.422 + obj_file.c_str(), strerror(errno)); 1.423 + return false; 1.424 + } 1.425 + map_wrapper->set(obj_base, st.st_size); 1.426 + *elf_header = obj_base; 1.427 + if (!IsValidElf(*elf_header)) { 1.428 + fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); 1.429 + return false; 1.430 + } 1.431 + return true; 1.432 +} 1.433 + 1.434 +// Get the endianness of ELF_HEADER. If it's invalid, return false. 1.435 +template<typename ElfClass> 1.436 +bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, 1.437 + bool* big_endian) { 1.438 + if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { 1.439 + *big_endian = false; 1.440 + return true; 1.441 + } 1.442 + if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { 1.443 + *big_endian = true; 1.444 + return true; 1.445 + } 1.446 + 1.447 + fprintf(stderr, "bad data encoding in ELF header: %d\n", 1.448 + elf_header->e_ident[EI_DATA]); 1.449 + return false; 1.450 +} 1.451 + 1.452 +// Read the .gnu_debuglink and get the debug file name. If anything goes 1.453 +// wrong, return an empty string. 1.454 +template<typename ElfClass> 1.455 +string ReadDebugLink(const char* debuglink, 1.456 + size_t debuglink_size, 1.457 + const string& obj_file, 1.458 + const std::vector<string>& debug_dirs) { 1.459 + size_t debuglink_len = strlen(debuglink) + 5; // '\0' + CRC32. 1.460 + debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round to nearest 4 bytes. 1.461 + 1.462 + // Sanity check. 1.463 + if (debuglink_len != debuglink_size) { 1.464 + fprintf(stderr, "Mismatched .gnu_debuglink string / section size: " 1.465 + "%zx %zx\n", debuglink_len, debuglink_size); 1.466 + return ""; 1.467 + } 1.468 + 1.469 + bool found = false; 1.470 + int debuglink_fd = -1; 1.471 + string debuglink_path; 1.472 + std::vector<string>::const_iterator it; 1.473 + for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) { 1.474 + const string& debug_dir = *it; 1.475 + debuglink_path = debug_dir + "/" + debuglink; 1.476 + debuglink_fd = open(debuglink_path.c_str(), O_RDONLY); 1.477 + if (debuglink_fd >= 0) { 1.478 + found = true; 1.479 + break; 1.480 + } 1.481 + } 1.482 + 1.483 + if (!found) { 1.484 + fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n", 1.485 + obj_file.c_str()); 1.486 + for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) { 1.487 + const string debug_dir = *it; 1.488 + fprintf(stderr, " %s/%s\n", debug_dir.c_str(), debuglink); 1.489 + } 1.490 + return ""; 1.491 + } 1.492 + 1.493 + FDWrapper debuglink_fd_wrapper(debuglink_fd); 1.494 + // TODO(thestig) check the CRC-32 at the end of the .gnu_debuglink 1.495 + // section. 1.496 + 1.497 + return debuglink_path; 1.498 +} 1.499 + 1.500 +// 1.501 +// LoadSymbolsInfo 1.502 +// 1.503 +// Holds the state between the two calls to LoadSymbols() in case it's necessary 1.504 +// to follow the .gnu_debuglink section and load debug information from a 1.505 +// different file. 1.506 +// 1.507 +template<typename ElfClass> 1.508 +class LoadSymbolsInfo { 1.509 + public: 1.510 + typedef typename ElfClass::Addr Addr; 1.511 + 1.512 + explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) : 1.513 + debug_dirs_(dbg_dirs), 1.514 + has_loading_addr_(false) {} 1.515 + 1.516 + // Keeps track of which sections have been loaded so sections don't 1.517 + // accidentally get loaded twice from two different files. 1.518 + void LoadedSection(const string §ion) { 1.519 + if (loaded_sections_.count(section) == 0) { 1.520 + loaded_sections_.insert(section); 1.521 + } else { 1.522 + fprintf(stderr, "Section %s has already been loaded.\n", 1.523 + section.c_str()); 1.524 + } 1.525 + } 1.526 + 1.527 + // The ELF file and linked debug file are expected to have the same preferred 1.528 + // loading address. 1.529 + void set_loading_addr(Addr addr, const string &filename) { 1.530 + if (!has_loading_addr_) { 1.531 + loading_addr_ = addr; 1.532 + loaded_file_ = filename; 1.533 + return; 1.534 + } 1.535 + 1.536 + if (addr != loading_addr_) { 1.537 + fprintf(stderr, 1.538 + "ELF file '%s' and debug ELF file '%s' " 1.539 + "have different load addresses.\n", 1.540 + loaded_file_.c_str(), filename.c_str()); 1.541 + assert(false); 1.542 + } 1.543 + } 1.544 + 1.545 + // Setters and getters 1.546 + const std::vector<string>& debug_dirs() const { 1.547 + return debug_dirs_; 1.548 + } 1.549 + 1.550 + string debuglink_file() const { 1.551 + return debuglink_file_; 1.552 + } 1.553 + void set_debuglink_file(string file) { 1.554 + debuglink_file_ = file; 1.555 + } 1.556 + 1.557 + private: 1.558 + const std::vector<string>& debug_dirs_; // Directories in which to 1.559 + // search for the debug ELF file. 1.560 + 1.561 + string debuglink_file_; // Full path to the debug ELF file. 1.562 + 1.563 + bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. 1.564 + 1.565 + Addr loading_addr_; // Saves the preferred loading address from the 1.566 + // first call to LoadSymbols(). 1.567 + 1.568 + string loaded_file_; // Name of the file loaded from the first call to 1.569 + // LoadSymbols(). 1.570 + 1.571 + std::set<string> loaded_sections_; // Tracks the Loaded ELF sections 1.572 + // between calls to LoadSymbols(). 1.573 +}; 1.574 + 1.575 +template<typename ElfClass> 1.576 +bool LoadSymbols(const string& obj_file, 1.577 + const bool big_endian, 1.578 + const typename ElfClass::Ehdr* elf_header, 1.579 + const bool read_gnu_debug_link, 1.580 + LoadSymbolsInfo<ElfClass>* info, 1.581 + SymbolData symbol_data, 1.582 + Module* module) { 1.583 + typedef typename ElfClass::Addr Addr; 1.584 + typedef typename ElfClass::Phdr Phdr; 1.585 + typedef typename ElfClass::Shdr Shdr; 1.586 + 1.587 + BPLOG(INFO) << ""; 1.588 + BPLOG(INFO) << "LoadSymbols: BEGIN " << obj_file; 1.589 + 1.590 + Addr loading_addr = GetLoadingAddress<ElfClass>( 1.591 + GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), 1.592 + elf_header->e_phnum); 1.593 + module->SetLoadAddress(loading_addr); 1.594 + info->set_loading_addr(loading_addr, obj_file); 1.595 + 1.596 + const Shdr* sections = 1.597 + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); 1.598 + const Shdr* section_names = sections + elf_header->e_shstrndx; 1.599 + const char* names = 1.600 + GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); 1.601 + const char *names_end = names + section_names->sh_size; 1.602 + bool found_debug_info_section = false; 1.603 + bool found_usable_info = false; 1.604 + 1.605 + if (symbol_data != ONLY_CFI) { 1.606 +#ifndef NO_STABS_SUPPORT 1.607 + // Look for STABS debugging information, and load it if present. 1.608 + const Shdr* stab_section = 1.609 + FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS, 1.610 + sections, names, names_end, 1.611 + elf_header->e_shnum); 1.612 + if (stab_section) { 1.613 + const Shdr* stabstr_section = stab_section->sh_link + sections; 1.614 + if (stabstr_section) { 1.615 + found_debug_info_section = true; 1.616 + found_usable_info = true; 1.617 + info->LoadedSection(".stab"); 1.618 + if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section, 1.619 + big_endian, module)) { 1.620 + fprintf(stderr, "%s: \".stab\" section found, but failed to load" 1.621 + " STABS debugging information\n", obj_file.c_str()); 1.622 + } 1.623 + } 1.624 + } 1.625 +#endif // NO_STABS_SUPPORT 1.626 + 1.627 + // Look for DWARF debugging information, and load it if present. 1.628 + const Shdr* dwarf_section = 1.629 + FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS, 1.630 + sections, names, names_end, 1.631 + elf_header->e_shnum); 1.632 + if (dwarf_section) { 1.633 + found_debug_info_section = true; 1.634 + found_usable_info = true; 1.635 + info->LoadedSection(".debug_info"); 1.636 + if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian, module)) 1.637 + fprintf(stderr, "%s: \".debug_info\" section found, but failed to load " 1.638 + "DWARF debugging information\n", obj_file.c_str()); 1.639 + } 1.640 + } 1.641 + 1.642 + if (symbol_data != NO_CFI) { 1.643 + // Dwarf Call Frame Information (CFI) is actually independent from 1.644 + // the other DWARF debugging information, and can be used alone. 1.645 + const Shdr* dwarf_cfi_section = 1.646 + FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, 1.647 + sections, names, names_end, 1.648 + elf_header->e_shnum); 1.649 + if (dwarf_cfi_section) { 1.650 + // Ignore the return value of this function; even without call frame 1.651 + // information, the other debugging information could be perfectly 1.652 + // useful. 1.653 + info->LoadedSection(".debug_frame"); 1.654 + bool result = 1.655 + LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame", 1.656 + dwarf_cfi_section, false, 0, 0, big_endian, 1.657 + module); 1.658 + found_usable_info = found_usable_info || result; 1.659 + if (result) 1.660 + BPLOG(INFO) << "LoadSymbols: read CFI from .debug_frame"; 1.661 + } 1.662 + 1.663 + // Linux C++ exception handling information can also provide 1.664 + // unwinding data. 1.665 + const Shdr* eh_frame_section = 1.666 + FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, 1.667 + sections, names, names_end, 1.668 + elf_header->e_shnum); 1.669 + if (eh_frame_section) { 1.670 + // Pointers in .eh_frame data may be relative to the base addresses of 1.671 + // certain sections. Provide those sections if present. 1.672 + const Shdr* got_section = 1.673 + FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS, 1.674 + sections, names, names_end, 1.675 + elf_header->e_shnum); 1.676 + const Shdr* text_section = 1.677 + FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS, 1.678 + sections, names, names_end, 1.679 + elf_header->e_shnum); 1.680 + info->LoadedSection(".eh_frame"); 1.681 + // As above, ignore the return value of this function. 1.682 + bool result = 1.683 + LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame", 1.684 + eh_frame_section, true, 1.685 + got_section, text_section, big_endian, module); 1.686 + found_usable_info = found_usable_info || result; 1.687 + if (result) 1.688 + BPLOG(INFO) << "LoadSymbols: read CFI from .eh_frame"; 1.689 + } 1.690 + } 1.691 + 1.692 + // ARM has special unwind tables that can be used. 1.693 + const Shdr* arm_exidx_section = 1.694 + FindElfSectionByName<ElfClass>(".ARM.exidx", SHT_ARM_EXIDX, 1.695 + sections, names, names_end, 1.696 + elf_header->e_shnum); 1.697 + const Shdr* arm_extab_section = 1.698 + FindElfSectionByName<ElfClass>(".ARM.extab", SHT_PROGBITS, 1.699 + sections, names, names_end, 1.700 + elf_header->e_shnum); 1.701 + // Only load information from this section if there isn't a .debug_info 1.702 + // section. 1.703 + if (!found_debug_info_section 1.704 + && arm_exidx_section && arm_extab_section && symbol_data != NO_CFI) { 1.705 + info->LoadedSection(".ARM.exidx"); 1.706 + info->LoadedSection(".ARM.extab"); 1.707 + bool result = LoadARMexidx<ElfClass>(elf_header, 1.708 + arm_exidx_section, arm_extab_section, 1.709 + loading_addr, module); 1.710 + found_usable_info = found_usable_info || result; 1.711 + if (result) 1.712 + BPLOG(INFO) << "LoadSymbols: read EXIDX from .ARM.{exidx,extab}"; 1.713 + } 1.714 + 1.715 + if (!found_debug_info_section && symbol_data != ONLY_CFI) { 1.716 + fprintf(stderr, "%s: file contains no debugging information" 1.717 + " (no \".stab\" or \".debug_info\" sections)\n", 1.718 + obj_file.c_str()); 1.719 + 1.720 + // Failed, but maybe there's a .gnu_debuglink section? 1.721 + if (read_gnu_debug_link) { 1.722 + const Shdr* gnu_debuglink_section 1.723 + = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS, 1.724 + sections, names, 1.725 + names_end, elf_header->e_shnum); 1.726 + if (gnu_debuglink_section) { 1.727 + if (!info->debug_dirs().empty()) { 1.728 + const char* debuglink_contents = 1.729 + GetOffset<ElfClass, char>(elf_header, 1.730 + gnu_debuglink_section->sh_offset); 1.731 + string debuglink_file 1.732 + = ReadDebugLink<ElfClass>(debuglink_contents, 1.733 + gnu_debuglink_section->sh_size, 1.734 + obj_file, info->debug_dirs()); 1.735 + info->set_debuglink_file(debuglink_file); 1.736 + } else { 1.737 + fprintf(stderr, ".gnu_debuglink section found in '%s', " 1.738 + "but no debug path specified.\n", obj_file.c_str()); 1.739 + } 1.740 + } else { 1.741 + fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n", 1.742 + obj_file.c_str()); 1.743 + } 1.744 + } else { 1.745 + if (symbol_data != ONLY_CFI) { 1.746 + // The caller doesn't want to consult .gnu_debuglink. 1.747 + // See if there are export symbols available. 1.748 + const Shdr* dynsym_section = 1.749 + FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM, 1.750 + sections, names, names_end, 1.751 + elf_header->e_shnum); 1.752 + const Shdr* dynstr_section = 1.753 + FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB, 1.754 + sections, names, names_end, 1.755 + elf_header->e_shnum); 1.756 + if (dynsym_section && dynstr_section) { 1.757 + info->LoadedSection(".dynsym"); 1.758 + 1.759 + const uint8_t* dynsyms = 1.760 + GetOffset<ElfClass, uint8_t>(elf_header, 1.761 + dynsym_section->sh_offset); 1.762 + const uint8_t* dynstrs = 1.763 + GetOffset<ElfClass, uint8_t>(elf_header, 1.764 + dynstr_section->sh_offset); 1.765 + bool result = 1.766 + ELFSymbolsToModule(dynsyms, 1.767 + dynsym_section->sh_size, 1.768 + dynstrs, 1.769 + dynstr_section->sh_size, 1.770 + big_endian, 1.771 + ElfClass::kAddrSize, 1.772 + module); 1.773 + found_usable_info = found_usable_info || result; 1.774 + } 1.775 + } 1.776 + 1.777 + // Return true if some usable information was found, since 1.778 + // the caller doesn't want to use .gnu_debuglink. 1.779 + BPLOG(INFO) << "LoadSymbols: " 1.780 + << (found_usable_info ? "SUCCESS " : "FAILURE ") 1.781 + << obj_file; 1.782 + return found_usable_info; 1.783 + } 1.784 + 1.785 + // No debug info was found, let the user try again with .gnu_debuglink 1.786 + // if present. 1.787 + BPLOG(INFO) << "LoadSymbols: FAILURE " << obj_file; 1.788 + return false; 1.789 + } 1.790 + 1.791 + BPLOG(INFO) << "LoadSymbols: SUCCESS " << obj_file; 1.792 + return true; 1.793 +} 1.794 + 1.795 +// Return the breakpad symbol file identifier for the architecture of 1.796 +// ELF_HEADER. 1.797 +template<typename ElfClass> 1.798 +const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { 1.799 + typedef typename ElfClass::Half Half; 1.800 + Half arch = elf_header->e_machine; 1.801 + switch (arch) { 1.802 + case EM_386: return "x86"; 1.803 + case EM_ARM: return "arm"; 1.804 + case EM_MIPS: return "mips"; 1.805 + case EM_PPC64: return "ppc64"; 1.806 + case EM_PPC: return "ppc"; 1.807 + case EM_S390: return "s390"; 1.808 + case EM_SPARC: return "sparc"; 1.809 + case EM_SPARCV9: return "sparcv9"; 1.810 + case EM_X86_64: return "x86_64"; 1.811 + default: return NULL; 1.812 + } 1.813 +} 1.814 + 1.815 +// Format the Elf file identifier in IDENTIFIER as a UUID with the 1.816 +// dashes removed. 1.817 +string FormatIdentifier(unsigned char identifier[16]) { 1.818 + char identifier_str[40]; 1.819 + google_breakpad::FileID::ConvertIdentifierToString( 1.820 + identifier, 1.821 + identifier_str, 1.822 + sizeof(identifier_str)); 1.823 + string id_no_dash; 1.824 + for (int i = 0; identifier_str[i] != '\0'; ++i) 1.825 + if (identifier_str[i] != '-') 1.826 + id_no_dash += identifier_str[i]; 1.827 + // Add an extra "0" by the end. PDB files on Windows have an 'age' 1.828 + // number appended to the end of the file identifier; this isn't 1.829 + // really used or necessary on other platforms, but be consistent. 1.830 + id_no_dash += '0'; 1.831 + return id_no_dash; 1.832 +} 1.833 + 1.834 +// Return the non-directory portion of FILENAME: the portion after the 1.835 +// last slash, or the whole filename if there are no slashes. 1.836 +string BaseFileName(const string &filename) { 1.837 + // Lots of copies! basename's behavior is less than ideal. 1.838 + char *c_filename = strdup(filename.c_str()); 1.839 + string base = basename(c_filename); 1.840 + free(c_filename); 1.841 + return base; 1.842 +} 1.843 + 1.844 +template<typename ElfClass> 1.845 +bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, 1.846 + const string& obj_filename, 1.847 + const std::vector<string>& debug_dirs, 1.848 + SymbolData symbol_data, 1.849 + Module** out_module) { 1.850 + typedef typename ElfClass::Ehdr Ehdr; 1.851 + typedef typename ElfClass::Shdr Shdr; 1.852 + 1.853 + *out_module = NULL; 1.854 + 1.855 + unsigned char identifier[16]; 1.856 + if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header, 1.857 + identifier)) { 1.858 + fprintf(stderr, "%s: unable to generate file identifier\n", 1.859 + obj_filename.c_str()); 1.860 + return false; 1.861 + } 1.862 + 1.863 + const char *architecture = ElfArchitecture<ElfClass>(elf_header); 1.864 + if (!architecture) { 1.865 + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", 1.866 + obj_filename.c_str(), elf_header->e_machine); 1.867 + return false; 1.868 + } 1.869 + 1.870 + // Figure out what endianness this file is. 1.871 + bool big_endian; 1.872 + if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) 1.873 + return false; 1.874 + 1.875 + string name = BaseFileName(obj_filename); 1.876 + string os = "Linux"; 1.877 + string id = FormatIdentifier(identifier); 1.878 + 1.879 + LoadSymbolsInfo<ElfClass> info(debug_dirs); 1.880 + scoped_ptr<Module> module(new Module(name, os, architecture, id)); 1.881 + if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header, 1.882 + !debug_dirs.empty(), &info, 1.883 + symbol_data, module.get())) { 1.884 + const string debuglink_file = info.debuglink_file(); 1.885 + if (debuglink_file.empty()) 1.886 + return false; 1.887 + 1.888 + // Load debuglink ELF file. 1.889 + fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); 1.890 + MmapWrapper debug_map_wrapper; 1.891 + Ehdr* debug_elf_header = NULL; 1.892 + if (!LoadELF(debuglink_file, &debug_map_wrapper, 1.893 + reinterpret_cast<void**>(&debug_elf_header))) 1.894 + return false; 1.895 + // Sanity checks to make sure everything matches up. 1.896 + const char *debug_architecture = 1.897 + ElfArchitecture<ElfClass>(debug_elf_header); 1.898 + if (!debug_architecture) { 1.899 + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", 1.900 + debuglink_file.c_str(), debug_elf_header->e_machine); 1.901 + return false; 1.902 + } 1.903 + if (strcmp(architecture, debug_architecture)) { 1.904 + fprintf(stderr, "%s with ELF machine architecture %s does not match " 1.905 + "%s with ELF architecture %s\n", 1.906 + debuglink_file.c_str(), debug_architecture, 1.907 + obj_filename.c_str(), architecture); 1.908 + return false; 1.909 + } 1.910 + 1.911 + bool debug_big_endian; 1.912 + if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian)) 1.913 + return false; 1.914 + if (debug_big_endian != big_endian) { 1.915 + fprintf(stderr, "%s and %s does not match in endianness\n", 1.916 + obj_filename.c_str(), debuglink_file.c_str()); 1.917 + return false; 1.918 + } 1.919 + 1.920 + if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian, 1.921 + debug_elf_header, false, &info, 1.922 + symbol_data, module.get())) { 1.923 + return false; 1.924 + } 1.925 + } 1.926 + 1.927 + *out_module = module.release(); 1.928 + return true; 1.929 +} 1.930 + 1.931 +} // namespace 1.932 + 1.933 +namespace google_breakpad { 1.934 + 1.935 +// Not explicitly exported, but not static so it can be used in unit tests. 1.936 +bool ReadSymbolDataInternal(const uint8_t* obj_file, 1.937 + const string& obj_filename, 1.938 + const std::vector<string>& debug_dirs, 1.939 + SymbolData symbol_data, 1.940 + Module** module) { 1.941 + 1.942 + if (!IsValidElf(obj_file)) { 1.943 + fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); 1.944 + return false; 1.945 + } 1.946 + 1.947 + int elfclass = ElfClass(obj_file); 1.948 + if (elfclass == ELFCLASS32) { 1.949 + return ReadSymbolDataElfClass<ElfClass32>( 1.950 + reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs, 1.951 + symbol_data, module); 1.952 + } 1.953 + if (elfclass == ELFCLASS64) { 1.954 + return ReadSymbolDataElfClass<ElfClass64>( 1.955 + reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs, 1.956 + symbol_data, module); 1.957 + } 1.958 + 1.959 + return false; 1.960 +} 1.961 + 1.962 +bool WriteSymbolFile(const string &obj_file, 1.963 + const std::vector<string>& debug_dirs, 1.964 + SymbolData symbol_data, 1.965 + std::ostream &sym_stream) { 1.966 + Module* module; 1.967 + if (!ReadSymbolData(obj_file, debug_dirs, symbol_data, &module)) 1.968 + return false; 1.969 + 1.970 + bool result = module->Write(sym_stream, symbol_data); 1.971 + delete module; 1.972 + return result; 1.973 +} 1.974 + 1.975 +bool ReadSymbolData(const string& obj_file, 1.976 + const std::vector<string>& debug_dirs, 1.977 + SymbolData symbol_data, 1.978 + Module** module) { 1.979 + MmapWrapper map_wrapper; 1.980 + void* elf_header = NULL; 1.981 + if (!LoadELF(obj_file, &map_wrapper, &elf_header)) 1.982 + return false; 1.983 + 1.984 + return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header), 1.985 + obj_file, debug_dirs, symbol_data, module); 1.986 +} 1.987 + 1.988 +} // namespace google_breakpad