1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/profiler/LulElf.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1006 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 + 1.7 +// Copyright (c) 2006, 2011, 2012 Google Inc. 1.8 +// All rights reserved. 1.9 +// 1.10 +// Redistribution and use in source and binary forms, with or without 1.11 +// modification, are permitted provided that the following conditions are 1.12 +// met: 1.13 +// 1.14 +// * Redistributions of source code must retain the above copyright 1.15 +// notice, this list of conditions and the following disclaimer. 1.16 +// * Redistributions in binary form must reproduce the above 1.17 +// copyright notice, this list of conditions and the following disclaimer 1.18 +// in the documentation and/or other materials provided with the 1.19 +// distribution. 1.20 +// * Neither the name of Google Inc. nor the names of its 1.21 +// contributors may be used to endorse or promote products derived from 1.22 +// this software without specific prior written permission. 1.23 +// 1.24 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.25 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.26 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.27 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.28 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.29 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.30 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.31 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.32 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.33 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.34 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.35 + 1.36 +// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 1.37 + 1.38 +// (derived from) 1.39 +// dump_symbols.cc: implement google_breakpad::WriteSymbolFile: 1.40 +// Find all the debugging info in a file and dump it as a Breakpad symbol file. 1.41 +// 1.42 +// dump_symbols.h: Read debugging information from an ELF file, and write 1.43 +// it out as a Breakpad symbol file. 1.44 + 1.45 +// This file is derived from the following files in 1.46 +// toolkit/crashreporter/google-breakpad: 1.47 +// src/common/linux/dump_symbols.cc 1.48 +// src/common/linux/elfutils.cc 1.49 +// src/common/linux/file_id.cc 1.50 + 1.51 +#include <errno.h> 1.52 +#include <fcntl.h> 1.53 +#include <stdio.h> 1.54 +#include <string.h> 1.55 +#include <sys/mman.h> 1.56 +#include <sys/stat.h> 1.57 +#include <unistd.h> 1.58 +#include <arpa/inet.h> 1.59 + 1.60 +#include <set> 1.61 +#include <string> 1.62 +#include <vector> 1.63 + 1.64 +#include "mozilla/Assertions.h" 1.65 + 1.66 +#include "LulPlatformMacros.h" 1.67 +#include "LulCommonExt.h" 1.68 +#include "LulDwarfExt.h" 1.69 +#if defined(LUL_PLAT_arm_android) 1.70 +# include "LulExidxExt.h" 1.71 +#endif 1.72 +#include "LulElfInt.h" 1.73 +#include "LulMainInt.h" 1.74 + 1.75 + 1.76 +#if defined(LUL_PLAT_arm_android) && !defined(SHT_ARM_EXIDX) 1.77 +// bionic and older glibsc don't define it 1.78 +# define SHT_ARM_EXIDX (SHT_LOPROC + 1) 1.79 +#endif 1.80 + 1.81 + 1.82 +// This namespace contains helper functions. 1.83 +namespace { 1.84 + 1.85 +using lul::DwarfCFIToModule; 1.86 +using lul::FindElfSectionByName; 1.87 +using lul::GetOffset; 1.88 +using lul::IsValidElf; 1.89 +using lul::Module; 1.90 +using lul::UniqueString; 1.91 +using lul::scoped_ptr; 1.92 +using lul::Summariser; 1.93 +using std::string; 1.94 +using std::vector; 1.95 +using std::set; 1.96 + 1.97 +// 1.98 +// FDWrapper 1.99 +// 1.100 +// Wrapper class to make sure opened file is closed. 1.101 +// 1.102 +class FDWrapper { 1.103 + public: 1.104 + explicit FDWrapper(int fd) : 1.105 + fd_(fd) {} 1.106 + ~FDWrapper() { 1.107 + if (fd_ != -1) 1.108 + close(fd_); 1.109 + } 1.110 + int get() { 1.111 + return fd_; 1.112 + } 1.113 + int release() { 1.114 + int fd = fd_; 1.115 + fd_ = -1; 1.116 + return fd; 1.117 + } 1.118 + private: 1.119 + int fd_; 1.120 +}; 1.121 + 1.122 +// 1.123 +// MmapWrapper 1.124 +// 1.125 +// Wrapper class to make sure mapped regions are unmapped. 1.126 +// 1.127 +class MmapWrapper { 1.128 + public: 1.129 + MmapWrapper() : is_set_(false) {} 1.130 + ~MmapWrapper() { 1.131 + if (is_set_ && base_ != NULL) { 1.132 + MOZ_ASSERT(size_ > 0); 1.133 + munmap(base_, size_); 1.134 + } 1.135 + } 1.136 + void set(void *mapped_address, size_t mapped_size) { 1.137 + is_set_ = true; 1.138 + base_ = mapped_address; 1.139 + size_ = mapped_size; 1.140 + } 1.141 + void release() { 1.142 + MOZ_ASSERT(is_set_); 1.143 + is_set_ = false; 1.144 + base_ = NULL; 1.145 + size_ = 0; 1.146 + } 1.147 + 1.148 + private: 1.149 + bool is_set_; 1.150 + void *base_; 1.151 + size_t size_; 1.152 +}; 1.153 + 1.154 + 1.155 +// Set NUM_DW_REGNAMES to be the number of Dwarf register names 1.156 +// appropriate to the machine architecture given in HEADER. Return 1.157 +// true on success, or false if HEADER's machine architecture is not 1.158 +// supported. 1.159 +template<typename ElfClass> 1.160 +bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, 1.161 + unsigned int* num_dw_regnames) { 1.162 + switch (elf_header->e_machine) { 1.163 + case EM_386: 1.164 + *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386(); 1.165 + return true; 1.166 + case EM_ARM: 1.167 + *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM(); 1.168 + return true; 1.169 + case EM_X86_64: 1.170 + *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64(); 1.171 + return true; 1.172 + default: 1.173 + MOZ_ASSERT(0); 1.174 + return false; 1.175 + } 1.176 +} 1.177 + 1.178 +template<typename ElfClass> 1.179 +bool LoadDwarfCFI(const string& dwarf_filename, 1.180 + const typename ElfClass::Ehdr* elf_header, 1.181 + const char* section_name, 1.182 + const typename ElfClass::Shdr* section, 1.183 + const bool eh_frame, 1.184 + const typename ElfClass::Shdr* got_section, 1.185 + const typename ElfClass::Shdr* text_section, 1.186 + const bool big_endian, 1.187 + SecMap* smap, 1.188 + uintptr_t text_bias, 1.189 + void (*log)(const char*)) { 1.190 + // Find the appropriate set of register names for this file's 1.191 + // architecture. 1.192 + unsigned int num_dw_regs = 0; 1.193 + if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) { 1.194 + fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" 1.195 + " cannot convert DWARF call frame information\n", 1.196 + dwarf_filename.c_str(), elf_header->e_machine); 1.197 + return false; 1.198 + } 1.199 + 1.200 + const lul::Endianness endianness 1.201 + = big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE; 1.202 + 1.203 + // Find the call frame information and its size. 1.204 + const char* cfi = 1.205 + GetOffset<ElfClass, char>(elf_header, section->sh_offset); 1.206 + size_t cfi_size = section->sh_size; 1.207 + 1.208 + // Plug together the parser, handler, and their entourages. 1.209 + 1.210 + // Here's a summariser, which will receive the output of the 1.211 + // parser, create summaries, and add them to |smap|. 1.212 + Summariser* summ = new Summariser(smap, text_bias, log); 1.213 + 1.214 + DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name); 1.215 + DwarfCFIToModule handler(num_dw_regs, &module_reporter, summ); 1.216 + lul::ByteReader byte_reader(endianness); 1.217 + 1.218 + byte_reader.SetAddressSize(ElfClass::kAddrSize); 1.219 + 1.220 + // Provide the base addresses for .eh_frame encoded pointers, if 1.221 + // possible. 1.222 + byte_reader.SetCFIDataBase(section->sh_addr, cfi); 1.223 + if (got_section) 1.224 + byte_reader.SetDataBase(got_section->sh_addr); 1.225 + if (text_section) 1.226 + byte_reader.SetTextBase(text_section->sh_addr); 1.227 + 1.228 + lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename, 1.229 + section_name); 1.230 + lul::CallFrameInfo parser(cfi, cfi_size, 1.231 + &byte_reader, &handler, &dwarf_reporter, 1.232 + eh_frame); 1.233 + parser.Start(); 1.234 + 1.235 + delete summ; 1.236 + return true; 1.237 +} 1.238 + 1.239 +#if defined(LUL_PLAT_arm_android) 1.240 +template<typename ElfClass> 1.241 +bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header, 1.242 + const typename ElfClass::Shdr* exidx_section, 1.243 + const typename ElfClass::Shdr* extab_section, 1.244 + uint32_t loading_addr, 1.245 + uintptr_t text_bias, 1.246 + SecMap* smap, 1.247 + void (*log)(const char*)) { 1.248 + // To do this properly we need to know: 1.249 + // * the bounds of the .ARM.exidx section in the mapped image 1.250 + // * the bounds of the .ARM.extab section in the mapped image 1.251 + // * the vma of the last byte in the text section associated with the .exidx 1.252 + // The first two are easy. The third is a bit tricky. If we can't 1.253 + // figure out what it is, just pass in zero. 1.254 + // Note that we are reading EXIDX directly out of the mapped in 1.255 + // executable image. Unlike with the CFI reader, there is no 1.256 + // auxiliary, temporary mapping used to read the unwind data. 1.257 + // 1.258 + // An .exidx section is always required, but the .extab section 1.259 + // can be optionally omitted, provided that .exidx does not refer 1.260 + // to it. If the .exidx is erroneous and does refer to .extab even 1.261 + // though .extab is missing, the range checks done by GET_EX_U32 in 1.262 + // ExceptionTableInfo::ExtabEntryExtract should prevent any invalid 1.263 + // memory accesses, and cause the .extab to be rejected as invalid. 1.264 + const char *exidx_img 1.265 + = GetOffset<ElfClass, char>(elf_header, exidx_section->sh_offset); 1.266 + size_t exidx_size = exidx_section->sh_size; 1.267 + const char *extab_img 1.268 + = extab_section 1.269 + ? GetOffset<ElfClass, char>(elf_header, extab_section->sh_offset) 1.270 + : nullptr; 1.271 + size_t extab_size = extab_section ? extab_section->sh_size : 0; 1.272 + 1.273 + // The sh_link field of the exidx section gives the section number 1.274 + // for the associated text section. 1.275 + uint32_t exidx_text_last_svma = 0; 1.276 + int exidx_text_sno = exidx_section->sh_link; 1.277 + typedef typename ElfClass::Shdr Shdr; 1.278 + // |sections| points to the section header table 1.279 + const Shdr* sections 1.280 + = GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); 1.281 + const int num_sections = elf_header->e_shnum; 1.282 + if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) { 1.283 + const Shdr* exidx_text_shdr = §ions[exidx_text_sno]; 1.284 + if (exidx_text_shdr->sh_size > 0) { 1.285 + exidx_text_last_svma 1.286 + = exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1; 1.287 + } 1.288 + } 1.289 + 1.290 + lul::ARMExToModule handler(smap, log); 1.291 + lul::ExceptionTableInfo 1.292 + parser(exidx_img, exidx_size, extab_img, extab_size, exidx_text_last_svma, 1.293 + &handler, 1.294 + reinterpret_cast<const char*>(elf_header), 1.295 + loading_addr, text_bias, log); 1.296 + parser.Start(); 1.297 + return true; 1.298 +} 1.299 +#endif /* defined(LUL_PLAT_arm_android) */ 1.300 + 1.301 +bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, 1.302 + void** elf_header) { 1.303 + int obj_fd = open(obj_file.c_str(), O_RDONLY); 1.304 + if (obj_fd < 0) { 1.305 + fprintf(stderr, "Failed to open ELF file '%s': %s\n", 1.306 + obj_file.c_str(), strerror(errno)); 1.307 + return false; 1.308 + } 1.309 + FDWrapper obj_fd_wrapper(obj_fd); 1.310 + struct stat st; 1.311 + if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { 1.312 + fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", 1.313 + obj_file.c_str(), strerror(errno)); 1.314 + return false; 1.315 + } 1.316 + // Mapping it read-only is good enough. In any case, mapping it 1.317 + // read-write confuses Valgrind's debuginfo acquire/discard 1.318 + // heuristics, making it hard to profile the profiler. 1.319 + void *obj_base = mmap(nullptr, st.st_size, 1.320 + PROT_READ, MAP_PRIVATE, obj_fd, 0); 1.321 + if (obj_base == MAP_FAILED) { 1.322 + fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", 1.323 + obj_file.c_str(), strerror(errno)); 1.324 + return false; 1.325 + } 1.326 + map_wrapper->set(obj_base, st.st_size); 1.327 + *elf_header = obj_base; 1.328 + if (!IsValidElf(*elf_header)) { 1.329 + fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); 1.330 + return false; 1.331 + } 1.332 + return true; 1.333 +} 1.334 + 1.335 +// Get the endianness of ELF_HEADER. If it's invalid, return false. 1.336 +template<typename ElfClass> 1.337 +bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, 1.338 + bool* big_endian) { 1.339 + if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { 1.340 + *big_endian = false; 1.341 + return true; 1.342 + } 1.343 + if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { 1.344 + *big_endian = true; 1.345 + return true; 1.346 + } 1.347 + 1.348 + fprintf(stderr, "bad data encoding in ELF header: %d\n", 1.349 + elf_header->e_ident[EI_DATA]); 1.350 + return false; 1.351 +} 1.352 + 1.353 +// 1.354 +// LoadSymbolsInfo 1.355 +// 1.356 +// Holds the state between the two calls to LoadSymbols() in case it's necessary 1.357 +// to follow the .gnu_debuglink section and load debug information from a 1.358 +// different file. 1.359 +// 1.360 +template<typename ElfClass> 1.361 +class LoadSymbolsInfo { 1.362 + public: 1.363 + typedef typename ElfClass::Addr Addr; 1.364 + 1.365 + explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) : 1.366 + debug_dirs_(dbg_dirs), 1.367 + has_loading_addr_(false) {} 1.368 + 1.369 + // Keeps track of which sections have been loaded so sections don't 1.370 + // accidentally get loaded twice from two different files. 1.371 + void LoadedSection(const string §ion) { 1.372 + if (loaded_sections_.count(section) == 0) { 1.373 + loaded_sections_.insert(section); 1.374 + } else { 1.375 + fprintf(stderr, "Section %s has already been loaded.\n", 1.376 + section.c_str()); 1.377 + } 1.378 + } 1.379 + 1.380 + string debuglink_file() const { 1.381 + return debuglink_file_; 1.382 + } 1.383 + 1.384 + private: 1.385 + const vector<string>& debug_dirs_; // Directories in which to 1.386 + // search for the debug ELF file. 1.387 + 1.388 + string debuglink_file_; // Full path to the debug ELF file. 1.389 + 1.390 + bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. 1.391 + 1.392 + set<string> loaded_sections_; // Tracks the Loaded ELF sections 1.393 + // between calls to LoadSymbols(). 1.394 +}; 1.395 + 1.396 +// Find the preferred loading address of the binary. 1.397 +template<typename ElfClass> 1.398 +typename ElfClass::Addr GetLoadingAddress( 1.399 + const typename ElfClass::Phdr* program_headers, 1.400 + int nheader) { 1.401 + typedef typename ElfClass::Phdr Phdr; 1.402 + 1.403 + // For non-PIC executables (e_type == ET_EXEC), the load address is 1.404 + // the start address of the first PT_LOAD segment. (ELF requires 1.405 + // the segments to be sorted by load address.) For PIC executables 1.406 + // and dynamic libraries (e_type == ET_DYN), this address will 1.407 + // normally be zero. 1.408 + for (int i = 0; i < nheader; ++i) { 1.409 + const Phdr& header = program_headers[i]; 1.410 + if (header.p_type == PT_LOAD) 1.411 + return header.p_vaddr; 1.412 + } 1.413 + return 0; 1.414 +} 1.415 + 1.416 +template<typename ElfClass> 1.417 +bool LoadSymbols(const string& obj_file, 1.418 + const bool big_endian, 1.419 + const typename ElfClass::Ehdr* elf_header, 1.420 + const bool read_gnu_debug_link, 1.421 + LoadSymbolsInfo<ElfClass>* info, 1.422 + SecMap* smap, 1.423 + void* rx_avma, 1.424 + void (*log)(const char*)) { 1.425 + typedef typename ElfClass::Phdr Phdr; 1.426 + typedef typename ElfClass::Shdr Shdr; 1.427 + 1.428 + char buf[500]; 1.429 + snprintf(buf, sizeof(buf), "LoadSymbols: BEGIN %s\n", obj_file.c_str()); 1.430 + buf[sizeof(buf)-1] = 0; 1.431 + log(buf); 1.432 + 1.433 + // This is how the text bias is calculated. 1.434 + // BEGIN CALCULATE BIAS 1.435 + uintptr_t loading_addr = GetLoadingAddress<ElfClass>( 1.436 + GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), 1.437 + elf_header->e_phnum); 1.438 + uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr; 1.439 + snprintf(buf, sizeof(buf), 1.440 + "LoadSymbols: rx_avma=%llx, text_bias=%llx", 1.441 + (unsigned long long int)(uintptr_t)rx_avma, 1.442 + (unsigned long long int)text_bias); 1.443 + buf[sizeof(buf)-1] = 0; 1.444 + log(buf); 1.445 + // END CALCULATE BIAS 1.446 + 1.447 + const Shdr* sections = 1.448 + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); 1.449 + const Shdr* section_names = sections + elf_header->e_shstrndx; 1.450 + const char* names = 1.451 + GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); 1.452 + const char *names_end = names + section_names->sh_size; 1.453 + bool found_usable_info = false; 1.454 + 1.455 + // Dwarf Call Frame Information (CFI) is actually independent from 1.456 + // the other DWARF debugging information, and can be used alone. 1.457 + const Shdr* dwarf_cfi_section = 1.458 + FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, 1.459 + sections, names, names_end, 1.460 + elf_header->e_shnum); 1.461 + if (dwarf_cfi_section) { 1.462 + // Ignore the return value of this function; even without call frame 1.463 + // information, the other debugging information could be perfectly 1.464 + // useful. 1.465 + info->LoadedSection(".debug_frame"); 1.466 + bool result = 1.467 + LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame", 1.468 + dwarf_cfi_section, false, 0, 0, big_endian, 1.469 + smap, text_bias, log); 1.470 + found_usable_info = found_usable_info || result; 1.471 + if (result) 1.472 + log("LoadSymbols: read CFI from .debug_frame"); 1.473 + } 1.474 + 1.475 + // Linux C++ exception handling information can also provide 1.476 + // unwinding data. 1.477 + const Shdr* eh_frame_section = 1.478 + FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, 1.479 + sections, names, names_end, 1.480 + elf_header->e_shnum); 1.481 + if (eh_frame_section) { 1.482 + // Pointers in .eh_frame data may be relative to the base addresses of 1.483 + // certain sections. Provide those sections if present. 1.484 + const Shdr* got_section = 1.485 + FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS, 1.486 + sections, names, names_end, 1.487 + elf_header->e_shnum); 1.488 + const Shdr* text_section = 1.489 + FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS, 1.490 + sections, names, names_end, 1.491 + elf_header->e_shnum); 1.492 + info->LoadedSection(".eh_frame"); 1.493 + // As above, ignore the return value of this function. 1.494 + bool result = 1.495 + LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame", 1.496 + eh_frame_section, true, 1.497 + got_section, text_section, big_endian, 1.498 + smap, text_bias, log); 1.499 + found_usable_info = found_usable_info || result; 1.500 + if (result) 1.501 + log("LoadSymbols: read CFI from .eh_frame"); 1.502 + } 1.503 + 1.504 +# if defined(LUL_PLAT_arm_android) 1.505 + // ARM has special unwind tables that can be used. .exidx is 1.506 + // always required, and .extab is normally required, but may 1.507 + // be omitted if it is empty. See comments on LoadARMexidx() 1.508 + // for more details. 1.509 + const Shdr* arm_exidx_section = 1.510 + FindElfSectionByName<ElfClass>(".ARM.exidx", SHT_ARM_EXIDX, 1.511 + sections, names, names_end, 1.512 + elf_header->e_shnum); 1.513 + const Shdr* arm_extab_section = 1.514 + FindElfSectionByName<ElfClass>(".ARM.extab", SHT_PROGBITS, 1.515 + sections, names, names_end, 1.516 + elf_header->e_shnum); 1.517 + const Shdr* debug_info_section = 1.518 + FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS, 1.519 + sections, names, names_end, 1.520 + elf_header->e_shnum); 1.521 + // Only load information from this section if there isn't a .debug_info 1.522 + // section. 1.523 + if (!debug_info_section && arm_exidx_section) { 1.524 + info->LoadedSection(".ARM.exidx"); 1.525 + if (arm_extab_section) 1.526 + info->LoadedSection(".ARM.extab"); 1.527 + bool result = LoadARMexidx<ElfClass>(elf_header, 1.528 + arm_exidx_section, arm_extab_section, 1.529 + loading_addr, text_bias, smap, log); 1.530 + found_usable_info = found_usable_info || result; 1.531 + if (result) 1.532 + log("LoadSymbols: read EXIDX from .ARM.{exidx,extab}"); 1.533 + } 1.534 +# endif /* defined(LUL_PLAT_arm_android) */ 1.535 + 1.536 + snprintf(buf, sizeof(buf), "LoadSymbols: END %s\n", obj_file.c_str()); 1.537 + buf[sizeof(buf)-1] = 0; 1.538 + log(buf); 1.539 + 1.540 + return found_usable_info; 1.541 +} 1.542 + 1.543 +// Return the breakpad symbol file identifier for the architecture of 1.544 +// ELF_HEADER. 1.545 +template<typename ElfClass> 1.546 +const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { 1.547 + typedef typename ElfClass::Half Half; 1.548 + Half arch = elf_header->e_machine; 1.549 + switch (arch) { 1.550 + case EM_386: return "x86"; 1.551 + case EM_ARM: return "arm"; 1.552 + case EM_MIPS: return "mips"; 1.553 + case EM_PPC64: return "ppc64"; 1.554 + case EM_PPC: return "ppc"; 1.555 + case EM_S390: return "s390"; 1.556 + case EM_SPARC: return "sparc"; 1.557 + case EM_SPARCV9: return "sparcv9"; 1.558 + case EM_X86_64: return "x86_64"; 1.559 + default: return NULL; 1.560 + } 1.561 +} 1.562 + 1.563 +// Format the Elf file identifier in IDENTIFIER as a UUID with the 1.564 +// dashes removed. 1.565 +string FormatIdentifier(unsigned char identifier[16]) { 1.566 + char identifier_str[40]; 1.567 + lul::FileID::ConvertIdentifierToString( 1.568 + identifier, 1.569 + identifier_str, 1.570 + sizeof(identifier_str)); 1.571 + string id_no_dash; 1.572 + for (int i = 0; identifier_str[i] != '\0'; ++i) 1.573 + if (identifier_str[i] != '-') 1.574 + id_no_dash += identifier_str[i]; 1.575 + // Add an extra "0" by the end. PDB files on Windows have an 'age' 1.576 + // number appended to the end of the file identifier; this isn't 1.577 + // really used or necessary on other platforms, but be consistent. 1.578 + id_no_dash += '0'; 1.579 + return id_no_dash; 1.580 +} 1.581 + 1.582 +// Return the non-directory portion of FILENAME: the portion after the 1.583 +// last slash, or the whole filename if there are no slashes. 1.584 +string BaseFileName(const string &filename) { 1.585 + // Lots of copies! basename's behavior is less than ideal. 1.586 + char *c_filename = strdup(filename.c_str()); 1.587 + string base = basename(c_filename); 1.588 + free(c_filename); 1.589 + return base; 1.590 +} 1.591 + 1.592 +template<typename ElfClass> 1.593 +bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, 1.594 + const string& obj_filename, 1.595 + const vector<string>& debug_dirs, 1.596 + SecMap* smap, void* rx_avma, 1.597 + void (*log)(const char*)) { 1.598 + typedef typename ElfClass::Ehdr Ehdr; 1.599 + 1.600 + unsigned char identifier[16]; 1.601 + if (!lul 1.602 + ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { 1.603 + fprintf(stderr, "%s: unable to generate file identifier\n", 1.604 + obj_filename.c_str()); 1.605 + return false; 1.606 + } 1.607 + 1.608 + const char *architecture = ElfArchitecture<ElfClass>(elf_header); 1.609 + if (!architecture) { 1.610 + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", 1.611 + obj_filename.c_str(), elf_header->e_machine); 1.612 + return false; 1.613 + } 1.614 + 1.615 + // Figure out what endianness this file is. 1.616 + bool big_endian; 1.617 + if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) 1.618 + return false; 1.619 + 1.620 + string name = BaseFileName(obj_filename); 1.621 + string os = "Linux"; 1.622 + string id = FormatIdentifier(identifier); 1.623 + 1.624 + LoadSymbolsInfo<ElfClass> info(debug_dirs); 1.625 + if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header, 1.626 + !debug_dirs.empty(), &info, 1.627 + smap, rx_avma, log)) { 1.628 + const string debuglink_file = info.debuglink_file(); 1.629 + if (debuglink_file.empty()) 1.630 + return false; 1.631 + 1.632 + // Load debuglink ELF file. 1.633 + fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); 1.634 + MmapWrapper debug_map_wrapper; 1.635 + Ehdr* debug_elf_header = NULL; 1.636 + if (!LoadELF(debuglink_file, &debug_map_wrapper, 1.637 + reinterpret_cast<void**>(&debug_elf_header))) 1.638 + return false; 1.639 + // Sanity checks to make sure everything matches up. 1.640 + const char *debug_architecture = 1.641 + ElfArchitecture<ElfClass>(debug_elf_header); 1.642 + if (!debug_architecture) { 1.643 + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", 1.644 + debuglink_file.c_str(), debug_elf_header->e_machine); 1.645 + return false; 1.646 + } 1.647 + if (strcmp(architecture, debug_architecture)) { 1.648 + fprintf(stderr, "%s with ELF machine architecture %s does not match " 1.649 + "%s with ELF architecture %s\n", 1.650 + debuglink_file.c_str(), debug_architecture, 1.651 + obj_filename.c_str(), architecture); 1.652 + return false; 1.653 + } 1.654 + 1.655 + bool debug_big_endian; 1.656 + if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian)) 1.657 + return false; 1.658 + if (debug_big_endian != big_endian) { 1.659 + fprintf(stderr, "%s and %s does not match in endianness\n", 1.660 + obj_filename.c_str(), debuglink_file.c_str()); 1.661 + return false; 1.662 + } 1.663 + 1.664 + if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian, 1.665 + debug_elf_header, false, &info, 1.666 + smap, rx_avma, log)) { 1.667 + return false; 1.668 + } 1.669 + } 1.670 + 1.671 + return true; 1.672 +} 1.673 + 1.674 +} // namespace (anon) 1.675 + 1.676 + 1.677 +namespace lul { 1.678 + 1.679 +bool ReadSymbolDataInternal(const uint8_t* obj_file, 1.680 + const string& obj_filename, 1.681 + const vector<string>& debug_dirs, 1.682 + SecMap* smap, void* rx_avma, 1.683 + void (*log)(const char*)) { 1.684 + 1.685 + if (!IsValidElf(obj_file)) { 1.686 + fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); 1.687 + return false; 1.688 + } 1.689 + 1.690 + int elfclass = ElfClass(obj_file); 1.691 + if (elfclass == ELFCLASS32) { 1.692 + return ReadSymbolDataElfClass<ElfClass32>( 1.693 + reinterpret_cast<const Elf32_Ehdr*>(obj_file), 1.694 + obj_filename, debug_dirs, smap, rx_avma, log); 1.695 + } 1.696 + if (elfclass == ELFCLASS64) { 1.697 + return ReadSymbolDataElfClass<ElfClass64>( 1.698 + reinterpret_cast<const Elf64_Ehdr*>(obj_file), 1.699 + obj_filename, debug_dirs, smap, rx_avma, log); 1.700 + } 1.701 + 1.702 + return false; 1.703 +} 1.704 + 1.705 +bool ReadSymbolData(const string& obj_file, 1.706 + const vector<string>& debug_dirs, 1.707 + SecMap* smap, void* rx_avma, 1.708 + void (*log)(const char*)) { 1.709 + MmapWrapper map_wrapper; 1.710 + void* elf_header = NULL; 1.711 + if (!LoadELF(obj_file, &map_wrapper, &elf_header)) 1.712 + return false; 1.713 + 1.714 + return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header), 1.715 + obj_file, debug_dirs, smap, rx_avma, log); 1.716 +} 1.717 + 1.718 + 1.719 +namespace { 1.720 + 1.721 +template<typename ElfClass> 1.722 +void FindElfClassSection(const char *elf_base, 1.723 + const char *section_name, 1.724 + typename ElfClass::Word section_type, 1.725 + const void **section_start, 1.726 + int *section_size) { 1.727 + typedef typename ElfClass::Ehdr Ehdr; 1.728 + typedef typename ElfClass::Shdr Shdr; 1.729 + 1.730 + MOZ_ASSERT(elf_base); 1.731 + MOZ_ASSERT(section_start); 1.732 + MOZ_ASSERT(section_size); 1.733 + 1.734 + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); 1.735 + 1.736 + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); 1.737 + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); 1.738 + 1.739 + const Shdr* sections = 1.740 + GetOffset<ElfClass,Shdr>(elf_header, elf_header->e_shoff); 1.741 + const Shdr* section_names = sections + elf_header->e_shstrndx; 1.742 + const char* names = 1.743 + GetOffset<ElfClass,char>(elf_header, section_names->sh_offset); 1.744 + const char *names_end = names + section_names->sh_size; 1.745 + 1.746 + const Shdr* section = 1.747 + FindElfSectionByName<ElfClass>(section_name, section_type, 1.748 + sections, names, names_end, 1.749 + elf_header->e_shnum); 1.750 + 1.751 + if (section != NULL && section->sh_size > 0) { 1.752 + *section_start = elf_base + section->sh_offset; 1.753 + *section_size = section->sh_size; 1.754 + } 1.755 +} 1.756 + 1.757 +template<typename ElfClass> 1.758 +void FindElfClassSegment(const char *elf_base, 1.759 + typename ElfClass::Word segment_type, 1.760 + const void **segment_start, 1.761 + int *segment_size) { 1.762 + typedef typename ElfClass::Ehdr Ehdr; 1.763 + typedef typename ElfClass::Phdr Phdr; 1.764 + 1.765 + MOZ_ASSERT(elf_base); 1.766 + MOZ_ASSERT(segment_start); 1.767 + MOZ_ASSERT(segment_size); 1.768 + 1.769 + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); 1.770 + 1.771 + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); 1.772 + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); 1.773 + 1.774 + const Phdr* phdrs = 1.775 + GetOffset<ElfClass,Phdr>(elf_header, elf_header->e_phoff); 1.776 + 1.777 + for (int i = 0; i < elf_header->e_phnum; ++i) { 1.778 + if (phdrs[i].p_type == segment_type) { 1.779 + *segment_start = elf_base + phdrs[i].p_offset; 1.780 + *segment_size = phdrs[i].p_filesz; 1.781 + return; 1.782 + } 1.783 + } 1.784 +} 1.785 + 1.786 +} // namespace (anon) 1.787 + 1.788 +bool IsValidElf(const void* elf_base) { 1.789 + return strncmp(reinterpret_cast<const char*>(elf_base), 1.790 + ELFMAG, SELFMAG) == 0; 1.791 +} 1.792 + 1.793 +int ElfClass(const void* elf_base) { 1.794 + const ElfW(Ehdr)* elf_header = 1.795 + reinterpret_cast<const ElfW(Ehdr)*>(elf_base); 1.796 + 1.797 + return elf_header->e_ident[EI_CLASS]; 1.798 +} 1.799 + 1.800 +bool FindElfSection(const void *elf_mapped_base, 1.801 + const char *section_name, 1.802 + uint32_t section_type, 1.803 + const void **section_start, 1.804 + int *section_size, 1.805 + int *elfclass) { 1.806 + MOZ_ASSERT(elf_mapped_base); 1.807 + MOZ_ASSERT(section_start); 1.808 + MOZ_ASSERT(section_size); 1.809 + 1.810 + *section_start = NULL; 1.811 + *section_size = 0; 1.812 + 1.813 + if (!IsValidElf(elf_mapped_base)) 1.814 + return false; 1.815 + 1.816 + int cls = ElfClass(elf_mapped_base); 1.817 + if (elfclass) { 1.818 + *elfclass = cls; 1.819 + } 1.820 + 1.821 + const char* elf_base = 1.822 + static_cast<const char*>(elf_mapped_base); 1.823 + 1.824 + if (cls == ELFCLASS32) { 1.825 + FindElfClassSection<ElfClass32>(elf_base, section_name, section_type, 1.826 + section_start, section_size); 1.827 + return *section_start != NULL; 1.828 + } else if (cls == ELFCLASS64) { 1.829 + FindElfClassSection<ElfClass64>(elf_base, section_name, section_type, 1.830 + section_start, section_size); 1.831 + return *section_start != NULL; 1.832 + } 1.833 + 1.834 + return false; 1.835 +} 1.836 + 1.837 +bool FindElfSegment(const void *elf_mapped_base, 1.838 + uint32_t segment_type, 1.839 + const void **segment_start, 1.840 + int *segment_size, 1.841 + int *elfclass) { 1.842 + MOZ_ASSERT(elf_mapped_base); 1.843 + MOZ_ASSERT(segment_start); 1.844 + MOZ_ASSERT(segment_size); 1.845 + 1.846 + *segment_start = NULL; 1.847 + *segment_size = 0; 1.848 + 1.849 + if (!IsValidElf(elf_mapped_base)) 1.850 + return false; 1.851 + 1.852 + int cls = ElfClass(elf_mapped_base); 1.853 + if (elfclass) { 1.854 + *elfclass = cls; 1.855 + } 1.856 + 1.857 + const char* elf_base = 1.858 + static_cast<const char*>(elf_mapped_base); 1.859 + 1.860 + if (cls == ELFCLASS32) { 1.861 + FindElfClassSegment<ElfClass32>(elf_base, segment_type, 1.862 + segment_start, segment_size); 1.863 + return *segment_start != NULL; 1.864 + } else if (cls == ELFCLASS64) { 1.865 + FindElfClassSegment<ElfClass64>(elf_base, segment_type, 1.866 + segment_start, segment_size); 1.867 + return *segment_start != NULL; 1.868 + } 1.869 + 1.870 + return false; 1.871 +} 1.872 + 1.873 + 1.874 +// (derived from) 1.875 +// file_id.cc: Return a unique identifier for a file 1.876 +// 1.877 +// See file_id.h for documentation 1.878 +// 1.879 + 1.880 +// ELF note name and desc are 32-bits word padded. 1.881 +#define NOTE_PADDING(a) ((a + 3) & ~3) 1.882 + 1.883 +// These functions are also used inside the crashed process, so be safe 1.884 +// and use the syscall/libc wrappers instead of direct syscalls or libc. 1.885 + 1.886 +template<typename ElfClass> 1.887 +static bool ElfClassBuildIDNoteIdentifier(const void *section, int length, 1.888 + uint8_t identifier[kMDGUIDSize]) { 1.889 + typedef typename ElfClass::Nhdr Nhdr; 1.890 + 1.891 + const void* section_end = reinterpret_cast<const char*>(section) + length; 1.892 + const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section); 1.893 + while (reinterpret_cast<const void *>(note_header) < section_end) { 1.894 + if (note_header->n_type == NT_GNU_BUILD_ID) 1.895 + break; 1.896 + note_header = reinterpret_cast<const Nhdr*>( 1.897 + reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) + 1.898 + NOTE_PADDING(note_header->n_namesz) + 1.899 + NOTE_PADDING(note_header->n_descsz)); 1.900 + } 1.901 + if (reinterpret_cast<const void *>(note_header) >= section_end || 1.902 + note_header->n_descsz == 0) { 1.903 + return false; 1.904 + } 1.905 + 1.906 + const char* build_id = reinterpret_cast<const char*>(note_header) + 1.907 + sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); 1.908 + // Copy as many bits of the build ID as will fit 1.909 + // into the GUID space. 1.910 + memset(identifier, 0, kMDGUIDSize); 1.911 + memcpy(identifier, build_id, 1.912 + std::min(kMDGUIDSize, (size_t)note_header->n_descsz)); 1.913 + 1.914 + return true; 1.915 +} 1.916 + 1.917 +// Attempt to locate a .note.gnu.build-id section in an ELF binary 1.918 +// and copy as many bytes of it as will fit into |identifier|. 1.919 +static bool FindElfBuildIDNote(const void *elf_mapped_base, 1.920 + uint8_t identifier[kMDGUIDSize]) { 1.921 + void* note_section; 1.922 + int note_size, elfclass; 1.923 + if ((!FindElfSegment(elf_mapped_base, PT_NOTE, 1.924 + (const void**)¬e_section, ¬e_size, &elfclass) || 1.925 + note_size == 0) && 1.926 + (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, 1.927 + (const void**)¬e_section, ¬e_size, &elfclass) || 1.928 + note_size == 0)) { 1.929 + return false; 1.930 + } 1.931 + 1.932 + if (elfclass == ELFCLASS32) { 1.933 + return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size, 1.934 + identifier); 1.935 + } else if (elfclass == ELFCLASS64) { 1.936 + return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size, 1.937 + identifier); 1.938 + } 1.939 + 1.940 + return false; 1.941 +} 1.942 + 1.943 +// Attempt to locate the .text section of an ELF binary and generate 1.944 +// a simple hash by XORing the first page worth of bytes into |identifier|. 1.945 +static bool HashElfTextSection(const void *elf_mapped_base, 1.946 + uint8_t identifier[kMDGUIDSize]) { 1.947 + void* text_section; 1.948 + int text_size; 1.949 + if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, 1.950 + (const void**)&text_section, &text_size, NULL) || 1.951 + text_size == 0) { 1.952 + return false; 1.953 + } 1.954 + 1.955 + memset(identifier, 0, kMDGUIDSize); 1.956 + const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section); 1.957 + const uint8_t* ptr_end = ptr + std::min(text_size, 4096); 1.958 + while (ptr < ptr_end) { 1.959 + for (unsigned i = 0; i < kMDGUIDSize; i++) 1.960 + identifier[i] ^= ptr[i]; 1.961 + ptr += kMDGUIDSize; 1.962 + } 1.963 + return true; 1.964 +} 1.965 + 1.966 +// static 1.967 +bool FileID::ElfFileIdentifierFromMappedFile(const void* base, 1.968 + uint8_t identifier[kMDGUIDSize]) { 1.969 + // Look for a build id note first. 1.970 + if (FindElfBuildIDNote(base, identifier)) 1.971 + return true; 1.972 + 1.973 + // Fall back on hashing the first page of the text section. 1.974 + return HashElfTextSection(base, identifier); 1.975 +} 1.976 + 1.977 +// static 1.978 +void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], 1.979 + char* buffer, int buffer_length) { 1.980 + uint8_t identifier_swapped[kMDGUIDSize]; 1.981 + 1.982 + // Endian-ness swap to match dump processor expectation. 1.983 + memcpy(identifier_swapped, identifier, kMDGUIDSize); 1.984 + uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped); 1.985 + *data1 = htonl(*data1); 1.986 + uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4); 1.987 + *data2 = htons(*data2); 1.988 + uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6); 1.989 + *data3 = htons(*data3); 1.990 + 1.991 + int buffer_idx = 0; 1.992 + for (unsigned int idx = 0; 1.993 + (buffer_idx < buffer_length) && (idx < kMDGUIDSize); 1.994 + ++idx) { 1.995 + int hi = (identifier_swapped[idx] >> 4) & 0x0F; 1.996 + int lo = (identifier_swapped[idx]) & 0x0F; 1.997 + 1.998 + if (idx == 4 || idx == 6 || idx == 8 || idx == 10) 1.999 + buffer[buffer_idx++] = '-'; 1.1000 + 1.1001 + buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; 1.1002 + buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; 1.1003 + } 1.1004 + 1.1005 + // NULL terminate 1.1006 + buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; 1.1007 +} 1.1008 + 1.1009 +} // namespace lul