|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
|
3 |
|
4 // Copyright (c) 2006, 2011, 2012 Google Inc. |
|
5 // All rights reserved. |
|
6 // |
|
7 // Redistribution and use in source and binary forms, with or without |
|
8 // modification, are permitted provided that the following conditions are |
|
9 // met: |
|
10 // |
|
11 // * Redistributions of source code must retain the above copyright |
|
12 // notice, this list of conditions and the following disclaimer. |
|
13 // * Redistributions in binary form must reproduce the above |
|
14 // copyright notice, this list of conditions and the following disclaimer |
|
15 // in the documentation and/or other materials provided with the |
|
16 // distribution. |
|
17 // * Neither the name of Google Inc. nor the names of its |
|
18 // contributors may be used to endorse or promote products derived from |
|
19 // this software without specific prior written permission. |
|
20 // |
|
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
32 |
|
33 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
|
34 |
|
35 // (derived from) |
|
36 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile: |
|
37 // Find all the debugging info in a file and dump it as a Breakpad symbol file. |
|
38 // |
|
39 // dump_symbols.h: Read debugging information from an ELF file, and write |
|
40 // it out as a Breakpad symbol file. |
|
41 |
|
42 // This file is derived from the following files in |
|
43 // toolkit/crashreporter/google-breakpad: |
|
44 // src/common/linux/dump_symbols.cc |
|
45 // src/common/linux/elfutils.cc |
|
46 // src/common/linux/file_id.cc |
|
47 |
|
48 #include <errno.h> |
|
49 #include <fcntl.h> |
|
50 #include <stdio.h> |
|
51 #include <string.h> |
|
52 #include <sys/mman.h> |
|
53 #include <sys/stat.h> |
|
54 #include <unistd.h> |
|
55 #include <arpa/inet.h> |
|
56 |
|
57 #include <set> |
|
58 #include <string> |
|
59 #include <vector> |
|
60 |
|
61 #include "mozilla/Assertions.h" |
|
62 |
|
63 #include "LulPlatformMacros.h" |
|
64 #include "LulCommonExt.h" |
|
65 #include "LulDwarfExt.h" |
|
66 #if defined(LUL_PLAT_arm_android) |
|
67 # include "LulExidxExt.h" |
|
68 #endif |
|
69 #include "LulElfInt.h" |
|
70 #include "LulMainInt.h" |
|
71 |
|
72 |
|
73 #if defined(LUL_PLAT_arm_android) && !defined(SHT_ARM_EXIDX) |
|
74 // bionic and older glibsc don't define it |
|
75 # define SHT_ARM_EXIDX (SHT_LOPROC + 1) |
|
76 #endif |
|
77 |
|
78 |
|
79 // This namespace contains helper functions. |
|
80 namespace { |
|
81 |
|
82 using lul::DwarfCFIToModule; |
|
83 using lul::FindElfSectionByName; |
|
84 using lul::GetOffset; |
|
85 using lul::IsValidElf; |
|
86 using lul::Module; |
|
87 using lul::UniqueString; |
|
88 using lul::scoped_ptr; |
|
89 using lul::Summariser; |
|
90 using std::string; |
|
91 using std::vector; |
|
92 using std::set; |
|
93 |
|
94 // |
|
95 // FDWrapper |
|
96 // |
|
97 // Wrapper class to make sure opened file is closed. |
|
98 // |
|
99 class FDWrapper { |
|
100 public: |
|
101 explicit FDWrapper(int fd) : |
|
102 fd_(fd) {} |
|
103 ~FDWrapper() { |
|
104 if (fd_ != -1) |
|
105 close(fd_); |
|
106 } |
|
107 int get() { |
|
108 return fd_; |
|
109 } |
|
110 int release() { |
|
111 int fd = fd_; |
|
112 fd_ = -1; |
|
113 return fd; |
|
114 } |
|
115 private: |
|
116 int fd_; |
|
117 }; |
|
118 |
|
119 // |
|
120 // MmapWrapper |
|
121 // |
|
122 // Wrapper class to make sure mapped regions are unmapped. |
|
123 // |
|
124 class MmapWrapper { |
|
125 public: |
|
126 MmapWrapper() : is_set_(false) {} |
|
127 ~MmapWrapper() { |
|
128 if (is_set_ && base_ != NULL) { |
|
129 MOZ_ASSERT(size_ > 0); |
|
130 munmap(base_, size_); |
|
131 } |
|
132 } |
|
133 void set(void *mapped_address, size_t mapped_size) { |
|
134 is_set_ = true; |
|
135 base_ = mapped_address; |
|
136 size_ = mapped_size; |
|
137 } |
|
138 void release() { |
|
139 MOZ_ASSERT(is_set_); |
|
140 is_set_ = false; |
|
141 base_ = NULL; |
|
142 size_ = 0; |
|
143 } |
|
144 |
|
145 private: |
|
146 bool is_set_; |
|
147 void *base_; |
|
148 size_t size_; |
|
149 }; |
|
150 |
|
151 |
|
152 // Set NUM_DW_REGNAMES to be the number of Dwarf register names |
|
153 // appropriate to the machine architecture given in HEADER. Return |
|
154 // true on success, or false if HEADER's machine architecture is not |
|
155 // supported. |
|
156 template<typename ElfClass> |
|
157 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, |
|
158 unsigned int* num_dw_regnames) { |
|
159 switch (elf_header->e_machine) { |
|
160 case EM_386: |
|
161 *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386(); |
|
162 return true; |
|
163 case EM_ARM: |
|
164 *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM(); |
|
165 return true; |
|
166 case EM_X86_64: |
|
167 *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64(); |
|
168 return true; |
|
169 default: |
|
170 MOZ_ASSERT(0); |
|
171 return false; |
|
172 } |
|
173 } |
|
174 |
|
175 template<typename ElfClass> |
|
176 bool LoadDwarfCFI(const string& dwarf_filename, |
|
177 const typename ElfClass::Ehdr* elf_header, |
|
178 const char* section_name, |
|
179 const typename ElfClass::Shdr* section, |
|
180 const bool eh_frame, |
|
181 const typename ElfClass::Shdr* got_section, |
|
182 const typename ElfClass::Shdr* text_section, |
|
183 const bool big_endian, |
|
184 SecMap* smap, |
|
185 uintptr_t text_bias, |
|
186 void (*log)(const char*)) { |
|
187 // Find the appropriate set of register names for this file's |
|
188 // architecture. |
|
189 unsigned int num_dw_regs = 0; |
|
190 if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) { |
|
191 fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" |
|
192 " cannot convert DWARF call frame information\n", |
|
193 dwarf_filename.c_str(), elf_header->e_machine); |
|
194 return false; |
|
195 } |
|
196 |
|
197 const lul::Endianness endianness |
|
198 = big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE; |
|
199 |
|
200 // Find the call frame information and its size. |
|
201 const char* cfi = |
|
202 GetOffset<ElfClass, char>(elf_header, section->sh_offset); |
|
203 size_t cfi_size = section->sh_size; |
|
204 |
|
205 // Plug together the parser, handler, and their entourages. |
|
206 |
|
207 // Here's a summariser, which will receive the output of the |
|
208 // parser, create summaries, and add them to |smap|. |
|
209 Summariser* summ = new Summariser(smap, text_bias, log); |
|
210 |
|
211 DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name); |
|
212 DwarfCFIToModule handler(num_dw_regs, &module_reporter, summ); |
|
213 lul::ByteReader byte_reader(endianness); |
|
214 |
|
215 byte_reader.SetAddressSize(ElfClass::kAddrSize); |
|
216 |
|
217 // Provide the base addresses for .eh_frame encoded pointers, if |
|
218 // possible. |
|
219 byte_reader.SetCFIDataBase(section->sh_addr, cfi); |
|
220 if (got_section) |
|
221 byte_reader.SetDataBase(got_section->sh_addr); |
|
222 if (text_section) |
|
223 byte_reader.SetTextBase(text_section->sh_addr); |
|
224 |
|
225 lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename, |
|
226 section_name); |
|
227 lul::CallFrameInfo parser(cfi, cfi_size, |
|
228 &byte_reader, &handler, &dwarf_reporter, |
|
229 eh_frame); |
|
230 parser.Start(); |
|
231 |
|
232 delete summ; |
|
233 return true; |
|
234 } |
|
235 |
|
236 #if defined(LUL_PLAT_arm_android) |
|
237 template<typename ElfClass> |
|
238 bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header, |
|
239 const typename ElfClass::Shdr* exidx_section, |
|
240 const typename ElfClass::Shdr* extab_section, |
|
241 uint32_t loading_addr, |
|
242 uintptr_t text_bias, |
|
243 SecMap* smap, |
|
244 void (*log)(const char*)) { |
|
245 // To do this properly we need to know: |
|
246 // * the bounds of the .ARM.exidx section in the mapped image |
|
247 // * the bounds of the .ARM.extab section in the mapped image |
|
248 // * the vma of the last byte in the text section associated with the .exidx |
|
249 // The first two are easy. The third is a bit tricky. If we can't |
|
250 // figure out what it is, just pass in zero. |
|
251 // Note that we are reading EXIDX directly out of the mapped in |
|
252 // executable image. Unlike with the CFI reader, there is no |
|
253 // auxiliary, temporary mapping used to read the unwind data. |
|
254 // |
|
255 // An .exidx section is always required, but the .extab section |
|
256 // can be optionally omitted, provided that .exidx does not refer |
|
257 // to it. If the .exidx is erroneous and does refer to .extab even |
|
258 // though .extab is missing, the range checks done by GET_EX_U32 in |
|
259 // ExceptionTableInfo::ExtabEntryExtract should prevent any invalid |
|
260 // memory accesses, and cause the .extab to be rejected as invalid. |
|
261 const char *exidx_img |
|
262 = GetOffset<ElfClass, char>(elf_header, exidx_section->sh_offset); |
|
263 size_t exidx_size = exidx_section->sh_size; |
|
264 const char *extab_img |
|
265 = extab_section |
|
266 ? GetOffset<ElfClass, char>(elf_header, extab_section->sh_offset) |
|
267 : nullptr; |
|
268 size_t extab_size = extab_section ? extab_section->sh_size : 0; |
|
269 |
|
270 // The sh_link field of the exidx section gives the section number |
|
271 // for the associated text section. |
|
272 uint32_t exidx_text_last_svma = 0; |
|
273 int exidx_text_sno = exidx_section->sh_link; |
|
274 typedef typename ElfClass::Shdr Shdr; |
|
275 // |sections| points to the section header table |
|
276 const Shdr* sections |
|
277 = GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); |
|
278 const int num_sections = elf_header->e_shnum; |
|
279 if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) { |
|
280 const Shdr* exidx_text_shdr = §ions[exidx_text_sno]; |
|
281 if (exidx_text_shdr->sh_size > 0) { |
|
282 exidx_text_last_svma |
|
283 = exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1; |
|
284 } |
|
285 } |
|
286 |
|
287 lul::ARMExToModule handler(smap, log); |
|
288 lul::ExceptionTableInfo |
|
289 parser(exidx_img, exidx_size, extab_img, extab_size, exidx_text_last_svma, |
|
290 &handler, |
|
291 reinterpret_cast<const char*>(elf_header), |
|
292 loading_addr, text_bias, log); |
|
293 parser.Start(); |
|
294 return true; |
|
295 } |
|
296 #endif /* defined(LUL_PLAT_arm_android) */ |
|
297 |
|
298 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, |
|
299 void** elf_header) { |
|
300 int obj_fd = open(obj_file.c_str(), O_RDONLY); |
|
301 if (obj_fd < 0) { |
|
302 fprintf(stderr, "Failed to open ELF file '%s': %s\n", |
|
303 obj_file.c_str(), strerror(errno)); |
|
304 return false; |
|
305 } |
|
306 FDWrapper obj_fd_wrapper(obj_fd); |
|
307 struct stat st; |
|
308 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { |
|
309 fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", |
|
310 obj_file.c_str(), strerror(errno)); |
|
311 return false; |
|
312 } |
|
313 // Mapping it read-only is good enough. In any case, mapping it |
|
314 // read-write confuses Valgrind's debuginfo acquire/discard |
|
315 // heuristics, making it hard to profile the profiler. |
|
316 void *obj_base = mmap(nullptr, st.st_size, |
|
317 PROT_READ, MAP_PRIVATE, obj_fd, 0); |
|
318 if (obj_base == MAP_FAILED) { |
|
319 fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", |
|
320 obj_file.c_str(), strerror(errno)); |
|
321 return false; |
|
322 } |
|
323 map_wrapper->set(obj_base, st.st_size); |
|
324 *elf_header = obj_base; |
|
325 if (!IsValidElf(*elf_header)) { |
|
326 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); |
|
327 return false; |
|
328 } |
|
329 return true; |
|
330 } |
|
331 |
|
332 // Get the endianness of ELF_HEADER. If it's invalid, return false. |
|
333 template<typename ElfClass> |
|
334 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, |
|
335 bool* big_endian) { |
|
336 if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { |
|
337 *big_endian = false; |
|
338 return true; |
|
339 } |
|
340 if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { |
|
341 *big_endian = true; |
|
342 return true; |
|
343 } |
|
344 |
|
345 fprintf(stderr, "bad data encoding in ELF header: %d\n", |
|
346 elf_header->e_ident[EI_DATA]); |
|
347 return false; |
|
348 } |
|
349 |
|
350 // |
|
351 // LoadSymbolsInfo |
|
352 // |
|
353 // Holds the state between the two calls to LoadSymbols() in case it's necessary |
|
354 // to follow the .gnu_debuglink section and load debug information from a |
|
355 // different file. |
|
356 // |
|
357 template<typename ElfClass> |
|
358 class LoadSymbolsInfo { |
|
359 public: |
|
360 typedef typename ElfClass::Addr Addr; |
|
361 |
|
362 explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) : |
|
363 debug_dirs_(dbg_dirs), |
|
364 has_loading_addr_(false) {} |
|
365 |
|
366 // Keeps track of which sections have been loaded so sections don't |
|
367 // accidentally get loaded twice from two different files. |
|
368 void LoadedSection(const string §ion) { |
|
369 if (loaded_sections_.count(section) == 0) { |
|
370 loaded_sections_.insert(section); |
|
371 } else { |
|
372 fprintf(stderr, "Section %s has already been loaded.\n", |
|
373 section.c_str()); |
|
374 } |
|
375 } |
|
376 |
|
377 string debuglink_file() const { |
|
378 return debuglink_file_; |
|
379 } |
|
380 |
|
381 private: |
|
382 const vector<string>& debug_dirs_; // Directories in which to |
|
383 // search for the debug ELF file. |
|
384 |
|
385 string debuglink_file_; // Full path to the debug ELF file. |
|
386 |
|
387 bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. |
|
388 |
|
389 set<string> loaded_sections_; // Tracks the Loaded ELF sections |
|
390 // between calls to LoadSymbols(). |
|
391 }; |
|
392 |
|
393 // Find the preferred loading address of the binary. |
|
394 template<typename ElfClass> |
|
395 typename ElfClass::Addr GetLoadingAddress( |
|
396 const typename ElfClass::Phdr* program_headers, |
|
397 int nheader) { |
|
398 typedef typename ElfClass::Phdr Phdr; |
|
399 |
|
400 // For non-PIC executables (e_type == ET_EXEC), the load address is |
|
401 // the start address of the first PT_LOAD segment. (ELF requires |
|
402 // the segments to be sorted by load address.) For PIC executables |
|
403 // and dynamic libraries (e_type == ET_DYN), this address will |
|
404 // normally be zero. |
|
405 for (int i = 0; i < nheader; ++i) { |
|
406 const Phdr& header = program_headers[i]; |
|
407 if (header.p_type == PT_LOAD) |
|
408 return header.p_vaddr; |
|
409 } |
|
410 return 0; |
|
411 } |
|
412 |
|
413 template<typename ElfClass> |
|
414 bool LoadSymbols(const string& obj_file, |
|
415 const bool big_endian, |
|
416 const typename ElfClass::Ehdr* elf_header, |
|
417 const bool read_gnu_debug_link, |
|
418 LoadSymbolsInfo<ElfClass>* info, |
|
419 SecMap* smap, |
|
420 void* rx_avma, |
|
421 void (*log)(const char*)) { |
|
422 typedef typename ElfClass::Phdr Phdr; |
|
423 typedef typename ElfClass::Shdr Shdr; |
|
424 |
|
425 char buf[500]; |
|
426 snprintf(buf, sizeof(buf), "LoadSymbols: BEGIN %s\n", obj_file.c_str()); |
|
427 buf[sizeof(buf)-1] = 0; |
|
428 log(buf); |
|
429 |
|
430 // This is how the text bias is calculated. |
|
431 // BEGIN CALCULATE BIAS |
|
432 uintptr_t loading_addr = GetLoadingAddress<ElfClass>( |
|
433 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), |
|
434 elf_header->e_phnum); |
|
435 uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr; |
|
436 snprintf(buf, sizeof(buf), |
|
437 "LoadSymbols: rx_avma=%llx, text_bias=%llx", |
|
438 (unsigned long long int)(uintptr_t)rx_avma, |
|
439 (unsigned long long int)text_bias); |
|
440 buf[sizeof(buf)-1] = 0; |
|
441 log(buf); |
|
442 // END CALCULATE BIAS |
|
443 |
|
444 const Shdr* sections = |
|
445 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); |
|
446 const Shdr* section_names = sections + elf_header->e_shstrndx; |
|
447 const char* names = |
|
448 GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); |
|
449 const char *names_end = names + section_names->sh_size; |
|
450 bool found_usable_info = false; |
|
451 |
|
452 // Dwarf Call Frame Information (CFI) is actually independent from |
|
453 // the other DWARF debugging information, and can be used alone. |
|
454 const Shdr* dwarf_cfi_section = |
|
455 FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, |
|
456 sections, names, names_end, |
|
457 elf_header->e_shnum); |
|
458 if (dwarf_cfi_section) { |
|
459 // Ignore the return value of this function; even without call frame |
|
460 // information, the other debugging information could be perfectly |
|
461 // useful. |
|
462 info->LoadedSection(".debug_frame"); |
|
463 bool result = |
|
464 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame", |
|
465 dwarf_cfi_section, false, 0, 0, big_endian, |
|
466 smap, text_bias, log); |
|
467 found_usable_info = found_usable_info || result; |
|
468 if (result) |
|
469 log("LoadSymbols: read CFI from .debug_frame"); |
|
470 } |
|
471 |
|
472 // Linux C++ exception handling information can also provide |
|
473 // unwinding data. |
|
474 const Shdr* eh_frame_section = |
|
475 FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, |
|
476 sections, names, names_end, |
|
477 elf_header->e_shnum); |
|
478 if (eh_frame_section) { |
|
479 // Pointers in .eh_frame data may be relative to the base addresses of |
|
480 // certain sections. Provide those sections if present. |
|
481 const Shdr* got_section = |
|
482 FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS, |
|
483 sections, names, names_end, |
|
484 elf_header->e_shnum); |
|
485 const Shdr* text_section = |
|
486 FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS, |
|
487 sections, names, names_end, |
|
488 elf_header->e_shnum); |
|
489 info->LoadedSection(".eh_frame"); |
|
490 // As above, ignore the return value of this function. |
|
491 bool result = |
|
492 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame", |
|
493 eh_frame_section, true, |
|
494 got_section, text_section, big_endian, |
|
495 smap, text_bias, log); |
|
496 found_usable_info = found_usable_info || result; |
|
497 if (result) |
|
498 log("LoadSymbols: read CFI from .eh_frame"); |
|
499 } |
|
500 |
|
501 # if defined(LUL_PLAT_arm_android) |
|
502 // ARM has special unwind tables that can be used. .exidx is |
|
503 // always required, and .extab is normally required, but may |
|
504 // be omitted if it is empty. See comments on LoadARMexidx() |
|
505 // for more details. |
|
506 const Shdr* arm_exidx_section = |
|
507 FindElfSectionByName<ElfClass>(".ARM.exidx", SHT_ARM_EXIDX, |
|
508 sections, names, names_end, |
|
509 elf_header->e_shnum); |
|
510 const Shdr* arm_extab_section = |
|
511 FindElfSectionByName<ElfClass>(".ARM.extab", SHT_PROGBITS, |
|
512 sections, names, names_end, |
|
513 elf_header->e_shnum); |
|
514 const Shdr* debug_info_section = |
|
515 FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS, |
|
516 sections, names, names_end, |
|
517 elf_header->e_shnum); |
|
518 // Only load information from this section if there isn't a .debug_info |
|
519 // section. |
|
520 if (!debug_info_section && arm_exidx_section) { |
|
521 info->LoadedSection(".ARM.exidx"); |
|
522 if (arm_extab_section) |
|
523 info->LoadedSection(".ARM.extab"); |
|
524 bool result = LoadARMexidx<ElfClass>(elf_header, |
|
525 arm_exidx_section, arm_extab_section, |
|
526 loading_addr, text_bias, smap, log); |
|
527 found_usable_info = found_usable_info || result; |
|
528 if (result) |
|
529 log("LoadSymbols: read EXIDX from .ARM.{exidx,extab}"); |
|
530 } |
|
531 # endif /* defined(LUL_PLAT_arm_android) */ |
|
532 |
|
533 snprintf(buf, sizeof(buf), "LoadSymbols: END %s\n", obj_file.c_str()); |
|
534 buf[sizeof(buf)-1] = 0; |
|
535 log(buf); |
|
536 |
|
537 return found_usable_info; |
|
538 } |
|
539 |
|
540 // Return the breakpad symbol file identifier for the architecture of |
|
541 // ELF_HEADER. |
|
542 template<typename ElfClass> |
|
543 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { |
|
544 typedef typename ElfClass::Half Half; |
|
545 Half arch = elf_header->e_machine; |
|
546 switch (arch) { |
|
547 case EM_386: return "x86"; |
|
548 case EM_ARM: return "arm"; |
|
549 case EM_MIPS: return "mips"; |
|
550 case EM_PPC64: return "ppc64"; |
|
551 case EM_PPC: return "ppc"; |
|
552 case EM_S390: return "s390"; |
|
553 case EM_SPARC: return "sparc"; |
|
554 case EM_SPARCV9: return "sparcv9"; |
|
555 case EM_X86_64: return "x86_64"; |
|
556 default: return NULL; |
|
557 } |
|
558 } |
|
559 |
|
560 // Format the Elf file identifier in IDENTIFIER as a UUID with the |
|
561 // dashes removed. |
|
562 string FormatIdentifier(unsigned char identifier[16]) { |
|
563 char identifier_str[40]; |
|
564 lul::FileID::ConvertIdentifierToString( |
|
565 identifier, |
|
566 identifier_str, |
|
567 sizeof(identifier_str)); |
|
568 string id_no_dash; |
|
569 for (int i = 0; identifier_str[i] != '\0'; ++i) |
|
570 if (identifier_str[i] != '-') |
|
571 id_no_dash += identifier_str[i]; |
|
572 // Add an extra "0" by the end. PDB files on Windows have an 'age' |
|
573 // number appended to the end of the file identifier; this isn't |
|
574 // really used or necessary on other platforms, but be consistent. |
|
575 id_no_dash += '0'; |
|
576 return id_no_dash; |
|
577 } |
|
578 |
|
579 // Return the non-directory portion of FILENAME: the portion after the |
|
580 // last slash, or the whole filename if there are no slashes. |
|
581 string BaseFileName(const string &filename) { |
|
582 // Lots of copies! basename's behavior is less than ideal. |
|
583 char *c_filename = strdup(filename.c_str()); |
|
584 string base = basename(c_filename); |
|
585 free(c_filename); |
|
586 return base; |
|
587 } |
|
588 |
|
589 template<typename ElfClass> |
|
590 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, |
|
591 const string& obj_filename, |
|
592 const vector<string>& debug_dirs, |
|
593 SecMap* smap, void* rx_avma, |
|
594 void (*log)(const char*)) { |
|
595 typedef typename ElfClass::Ehdr Ehdr; |
|
596 |
|
597 unsigned char identifier[16]; |
|
598 if (!lul |
|
599 ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { |
|
600 fprintf(stderr, "%s: unable to generate file identifier\n", |
|
601 obj_filename.c_str()); |
|
602 return false; |
|
603 } |
|
604 |
|
605 const char *architecture = ElfArchitecture<ElfClass>(elf_header); |
|
606 if (!architecture) { |
|
607 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", |
|
608 obj_filename.c_str(), elf_header->e_machine); |
|
609 return false; |
|
610 } |
|
611 |
|
612 // Figure out what endianness this file is. |
|
613 bool big_endian; |
|
614 if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) |
|
615 return false; |
|
616 |
|
617 string name = BaseFileName(obj_filename); |
|
618 string os = "Linux"; |
|
619 string id = FormatIdentifier(identifier); |
|
620 |
|
621 LoadSymbolsInfo<ElfClass> info(debug_dirs); |
|
622 if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header, |
|
623 !debug_dirs.empty(), &info, |
|
624 smap, rx_avma, log)) { |
|
625 const string debuglink_file = info.debuglink_file(); |
|
626 if (debuglink_file.empty()) |
|
627 return false; |
|
628 |
|
629 // Load debuglink ELF file. |
|
630 fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); |
|
631 MmapWrapper debug_map_wrapper; |
|
632 Ehdr* debug_elf_header = NULL; |
|
633 if (!LoadELF(debuglink_file, &debug_map_wrapper, |
|
634 reinterpret_cast<void**>(&debug_elf_header))) |
|
635 return false; |
|
636 // Sanity checks to make sure everything matches up. |
|
637 const char *debug_architecture = |
|
638 ElfArchitecture<ElfClass>(debug_elf_header); |
|
639 if (!debug_architecture) { |
|
640 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", |
|
641 debuglink_file.c_str(), debug_elf_header->e_machine); |
|
642 return false; |
|
643 } |
|
644 if (strcmp(architecture, debug_architecture)) { |
|
645 fprintf(stderr, "%s with ELF machine architecture %s does not match " |
|
646 "%s with ELF architecture %s\n", |
|
647 debuglink_file.c_str(), debug_architecture, |
|
648 obj_filename.c_str(), architecture); |
|
649 return false; |
|
650 } |
|
651 |
|
652 bool debug_big_endian; |
|
653 if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian)) |
|
654 return false; |
|
655 if (debug_big_endian != big_endian) { |
|
656 fprintf(stderr, "%s and %s does not match in endianness\n", |
|
657 obj_filename.c_str(), debuglink_file.c_str()); |
|
658 return false; |
|
659 } |
|
660 |
|
661 if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian, |
|
662 debug_elf_header, false, &info, |
|
663 smap, rx_avma, log)) { |
|
664 return false; |
|
665 } |
|
666 } |
|
667 |
|
668 return true; |
|
669 } |
|
670 |
|
671 } // namespace (anon) |
|
672 |
|
673 |
|
674 namespace lul { |
|
675 |
|
676 bool ReadSymbolDataInternal(const uint8_t* obj_file, |
|
677 const string& obj_filename, |
|
678 const vector<string>& debug_dirs, |
|
679 SecMap* smap, void* rx_avma, |
|
680 void (*log)(const char*)) { |
|
681 |
|
682 if (!IsValidElf(obj_file)) { |
|
683 fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); |
|
684 return false; |
|
685 } |
|
686 |
|
687 int elfclass = ElfClass(obj_file); |
|
688 if (elfclass == ELFCLASS32) { |
|
689 return ReadSymbolDataElfClass<ElfClass32>( |
|
690 reinterpret_cast<const Elf32_Ehdr*>(obj_file), |
|
691 obj_filename, debug_dirs, smap, rx_avma, log); |
|
692 } |
|
693 if (elfclass == ELFCLASS64) { |
|
694 return ReadSymbolDataElfClass<ElfClass64>( |
|
695 reinterpret_cast<const Elf64_Ehdr*>(obj_file), |
|
696 obj_filename, debug_dirs, smap, rx_avma, log); |
|
697 } |
|
698 |
|
699 return false; |
|
700 } |
|
701 |
|
702 bool ReadSymbolData(const string& obj_file, |
|
703 const vector<string>& debug_dirs, |
|
704 SecMap* smap, void* rx_avma, |
|
705 void (*log)(const char*)) { |
|
706 MmapWrapper map_wrapper; |
|
707 void* elf_header = NULL; |
|
708 if (!LoadELF(obj_file, &map_wrapper, &elf_header)) |
|
709 return false; |
|
710 |
|
711 return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header), |
|
712 obj_file, debug_dirs, smap, rx_avma, log); |
|
713 } |
|
714 |
|
715 |
|
716 namespace { |
|
717 |
|
718 template<typename ElfClass> |
|
719 void FindElfClassSection(const char *elf_base, |
|
720 const char *section_name, |
|
721 typename ElfClass::Word section_type, |
|
722 const void **section_start, |
|
723 int *section_size) { |
|
724 typedef typename ElfClass::Ehdr Ehdr; |
|
725 typedef typename ElfClass::Shdr Shdr; |
|
726 |
|
727 MOZ_ASSERT(elf_base); |
|
728 MOZ_ASSERT(section_start); |
|
729 MOZ_ASSERT(section_size); |
|
730 |
|
731 MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); |
|
732 |
|
733 const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); |
|
734 MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); |
|
735 |
|
736 const Shdr* sections = |
|
737 GetOffset<ElfClass,Shdr>(elf_header, elf_header->e_shoff); |
|
738 const Shdr* section_names = sections + elf_header->e_shstrndx; |
|
739 const char* names = |
|
740 GetOffset<ElfClass,char>(elf_header, section_names->sh_offset); |
|
741 const char *names_end = names + section_names->sh_size; |
|
742 |
|
743 const Shdr* section = |
|
744 FindElfSectionByName<ElfClass>(section_name, section_type, |
|
745 sections, names, names_end, |
|
746 elf_header->e_shnum); |
|
747 |
|
748 if (section != NULL && section->sh_size > 0) { |
|
749 *section_start = elf_base + section->sh_offset; |
|
750 *section_size = section->sh_size; |
|
751 } |
|
752 } |
|
753 |
|
754 template<typename ElfClass> |
|
755 void FindElfClassSegment(const char *elf_base, |
|
756 typename ElfClass::Word segment_type, |
|
757 const void **segment_start, |
|
758 int *segment_size) { |
|
759 typedef typename ElfClass::Ehdr Ehdr; |
|
760 typedef typename ElfClass::Phdr Phdr; |
|
761 |
|
762 MOZ_ASSERT(elf_base); |
|
763 MOZ_ASSERT(segment_start); |
|
764 MOZ_ASSERT(segment_size); |
|
765 |
|
766 MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); |
|
767 |
|
768 const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); |
|
769 MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); |
|
770 |
|
771 const Phdr* phdrs = |
|
772 GetOffset<ElfClass,Phdr>(elf_header, elf_header->e_phoff); |
|
773 |
|
774 for (int i = 0; i < elf_header->e_phnum; ++i) { |
|
775 if (phdrs[i].p_type == segment_type) { |
|
776 *segment_start = elf_base + phdrs[i].p_offset; |
|
777 *segment_size = phdrs[i].p_filesz; |
|
778 return; |
|
779 } |
|
780 } |
|
781 } |
|
782 |
|
783 } // namespace (anon) |
|
784 |
|
785 bool IsValidElf(const void* elf_base) { |
|
786 return strncmp(reinterpret_cast<const char*>(elf_base), |
|
787 ELFMAG, SELFMAG) == 0; |
|
788 } |
|
789 |
|
790 int ElfClass(const void* elf_base) { |
|
791 const ElfW(Ehdr)* elf_header = |
|
792 reinterpret_cast<const ElfW(Ehdr)*>(elf_base); |
|
793 |
|
794 return elf_header->e_ident[EI_CLASS]; |
|
795 } |
|
796 |
|
797 bool FindElfSection(const void *elf_mapped_base, |
|
798 const char *section_name, |
|
799 uint32_t section_type, |
|
800 const void **section_start, |
|
801 int *section_size, |
|
802 int *elfclass) { |
|
803 MOZ_ASSERT(elf_mapped_base); |
|
804 MOZ_ASSERT(section_start); |
|
805 MOZ_ASSERT(section_size); |
|
806 |
|
807 *section_start = NULL; |
|
808 *section_size = 0; |
|
809 |
|
810 if (!IsValidElf(elf_mapped_base)) |
|
811 return false; |
|
812 |
|
813 int cls = ElfClass(elf_mapped_base); |
|
814 if (elfclass) { |
|
815 *elfclass = cls; |
|
816 } |
|
817 |
|
818 const char* elf_base = |
|
819 static_cast<const char*>(elf_mapped_base); |
|
820 |
|
821 if (cls == ELFCLASS32) { |
|
822 FindElfClassSection<ElfClass32>(elf_base, section_name, section_type, |
|
823 section_start, section_size); |
|
824 return *section_start != NULL; |
|
825 } else if (cls == ELFCLASS64) { |
|
826 FindElfClassSection<ElfClass64>(elf_base, section_name, section_type, |
|
827 section_start, section_size); |
|
828 return *section_start != NULL; |
|
829 } |
|
830 |
|
831 return false; |
|
832 } |
|
833 |
|
834 bool FindElfSegment(const void *elf_mapped_base, |
|
835 uint32_t segment_type, |
|
836 const void **segment_start, |
|
837 int *segment_size, |
|
838 int *elfclass) { |
|
839 MOZ_ASSERT(elf_mapped_base); |
|
840 MOZ_ASSERT(segment_start); |
|
841 MOZ_ASSERT(segment_size); |
|
842 |
|
843 *segment_start = NULL; |
|
844 *segment_size = 0; |
|
845 |
|
846 if (!IsValidElf(elf_mapped_base)) |
|
847 return false; |
|
848 |
|
849 int cls = ElfClass(elf_mapped_base); |
|
850 if (elfclass) { |
|
851 *elfclass = cls; |
|
852 } |
|
853 |
|
854 const char* elf_base = |
|
855 static_cast<const char*>(elf_mapped_base); |
|
856 |
|
857 if (cls == ELFCLASS32) { |
|
858 FindElfClassSegment<ElfClass32>(elf_base, segment_type, |
|
859 segment_start, segment_size); |
|
860 return *segment_start != NULL; |
|
861 } else if (cls == ELFCLASS64) { |
|
862 FindElfClassSegment<ElfClass64>(elf_base, segment_type, |
|
863 segment_start, segment_size); |
|
864 return *segment_start != NULL; |
|
865 } |
|
866 |
|
867 return false; |
|
868 } |
|
869 |
|
870 |
|
871 // (derived from) |
|
872 // file_id.cc: Return a unique identifier for a file |
|
873 // |
|
874 // See file_id.h for documentation |
|
875 // |
|
876 |
|
877 // ELF note name and desc are 32-bits word padded. |
|
878 #define NOTE_PADDING(a) ((a + 3) & ~3) |
|
879 |
|
880 // These functions are also used inside the crashed process, so be safe |
|
881 // and use the syscall/libc wrappers instead of direct syscalls or libc. |
|
882 |
|
883 template<typename ElfClass> |
|
884 static bool ElfClassBuildIDNoteIdentifier(const void *section, int length, |
|
885 uint8_t identifier[kMDGUIDSize]) { |
|
886 typedef typename ElfClass::Nhdr Nhdr; |
|
887 |
|
888 const void* section_end = reinterpret_cast<const char*>(section) + length; |
|
889 const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section); |
|
890 while (reinterpret_cast<const void *>(note_header) < section_end) { |
|
891 if (note_header->n_type == NT_GNU_BUILD_ID) |
|
892 break; |
|
893 note_header = reinterpret_cast<const Nhdr*>( |
|
894 reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) + |
|
895 NOTE_PADDING(note_header->n_namesz) + |
|
896 NOTE_PADDING(note_header->n_descsz)); |
|
897 } |
|
898 if (reinterpret_cast<const void *>(note_header) >= section_end || |
|
899 note_header->n_descsz == 0) { |
|
900 return false; |
|
901 } |
|
902 |
|
903 const char* build_id = reinterpret_cast<const char*>(note_header) + |
|
904 sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); |
|
905 // Copy as many bits of the build ID as will fit |
|
906 // into the GUID space. |
|
907 memset(identifier, 0, kMDGUIDSize); |
|
908 memcpy(identifier, build_id, |
|
909 std::min(kMDGUIDSize, (size_t)note_header->n_descsz)); |
|
910 |
|
911 return true; |
|
912 } |
|
913 |
|
914 // Attempt to locate a .note.gnu.build-id section in an ELF binary |
|
915 // and copy as many bytes of it as will fit into |identifier|. |
|
916 static bool FindElfBuildIDNote(const void *elf_mapped_base, |
|
917 uint8_t identifier[kMDGUIDSize]) { |
|
918 void* note_section; |
|
919 int note_size, elfclass; |
|
920 if ((!FindElfSegment(elf_mapped_base, PT_NOTE, |
|
921 (const void**)¬e_section, ¬e_size, &elfclass) || |
|
922 note_size == 0) && |
|
923 (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, |
|
924 (const void**)¬e_section, ¬e_size, &elfclass) || |
|
925 note_size == 0)) { |
|
926 return false; |
|
927 } |
|
928 |
|
929 if (elfclass == ELFCLASS32) { |
|
930 return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size, |
|
931 identifier); |
|
932 } else if (elfclass == ELFCLASS64) { |
|
933 return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size, |
|
934 identifier); |
|
935 } |
|
936 |
|
937 return false; |
|
938 } |
|
939 |
|
940 // Attempt to locate the .text section of an ELF binary and generate |
|
941 // a simple hash by XORing the first page worth of bytes into |identifier|. |
|
942 static bool HashElfTextSection(const void *elf_mapped_base, |
|
943 uint8_t identifier[kMDGUIDSize]) { |
|
944 void* text_section; |
|
945 int text_size; |
|
946 if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, |
|
947 (const void**)&text_section, &text_size, NULL) || |
|
948 text_size == 0) { |
|
949 return false; |
|
950 } |
|
951 |
|
952 memset(identifier, 0, kMDGUIDSize); |
|
953 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section); |
|
954 const uint8_t* ptr_end = ptr + std::min(text_size, 4096); |
|
955 while (ptr < ptr_end) { |
|
956 for (unsigned i = 0; i < kMDGUIDSize; i++) |
|
957 identifier[i] ^= ptr[i]; |
|
958 ptr += kMDGUIDSize; |
|
959 } |
|
960 return true; |
|
961 } |
|
962 |
|
963 // static |
|
964 bool FileID::ElfFileIdentifierFromMappedFile(const void* base, |
|
965 uint8_t identifier[kMDGUIDSize]) { |
|
966 // Look for a build id note first. |
|
967 if (FindElfBuildIDNote(base, identifier)) |
|
968 return true; |
|
969 |
|
970 // Fall back on hashing the first page of the text section. |
|
971 return HashElfTextSection(base, identifier); |
|
972 } |
|
973 |
|
974 // static |
|
975 void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], |
|
976 char* buffer, int buffer_length) { |
|
977 uint8_t identifier_swapped[kMDGUIDSize]; |
|
978 |
|
979 // Endian-ness swap to match dump processor expectation. |
|
980 memcpy(identifier_swapped, identifier, kMDGUIDSize); |
|
981 uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped); |
|
982 *data1 = htonl(*data1); |
|
983 uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4); |
|
984 *data2 = htons(*data2); |
|
985 uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6); |
|
986 *data3 = htons(*data3); |
|
987 |
|
988 int buffer_idx = 0; |
|
989 for (unsigned int idx = 0; |
|
990 (buffer_idx < buffer_length) && (idx < kMDGUIDSize); |
|
991 ++idx) { |
|
992 int hi = (identifier_swapped[idx] >> 4) & 0x0F; |
|
993 int lo = (identifier_swapped[idx]) & 0x0F; |
|
994 |
|
995 if (idx == 4 || idx == 6 || idx == 8 || idx == 10) |
|
996 buffer[buffer_idx++] = '-'; |
|
997 |
|
998 buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; |
|
999 buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; |
|
1000 } |
|
1001 |
|
1002 // NULL terminate |
|
1003 buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; |
|
1004 } |
|
1005 |
|
1006 } // namespace lul |