|
1 // Copyright (c) 2011 Google Inc. |
|
2 // All rights reserved. |
|
3 // |
|
4 // Redistribution and use in source and binary forms, with or without |
|
5 // modification, are permitted provided that the following conditions are |
|
6 // met: |
|
7 // |
|
8 // * Redistributions of source code must retain the above copyright |
|
9 // notice, this list of conditions and the following disclaimer. |
|
10 // * Redistributions in binary form must reproduce the above |
|
11 // copyright notice, this list of conditions and the following disclaimer |
|
12 // in the documentation and/or other materials provided with the |
|
13 // distribution. |
|
14 // * Neither the name of Google Inc. nor the names of its |
|
15 // contributors may be used to endorse or promote products derived from |
|
16 // this software without specific prior written permission. |
|
17 // |
|
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 |
|
30 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
|
31 |
|
32 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile: |
|
33 // Find all the debugging info in a file and dump it as a Breakpad symbol file. |
|
34 |
|
35 #include "common/linux/dump_symbols.h" |
|
36 |
|
37 #include <assert.h> |
|
38 #include <elf.h> |
|
39 #include <errno.h> |
|
40 #include <fcntl.h> |
|
41 #include <link.h> |
|
42 #include <stdio.h> |
|
43 #include <stdlib.h> |
|
44 #include <string.h> |
|
45 #include <sys/mman.h> |
|
46 #include <sys/stat.h> |
|
47 #include <unistd.h> |
|
48 |
|
49 #include <iostream> |
|
50 #include <set> |
|
51 #include <string> |
|
52 #include <utility> |
|
53 #include <vector> |
|
54 |
|
55 #include "common/arm_ex_reader.h" |
|
56 #include "common/dwarf/bytereader-inl.h" |
|
57 #include "common/dwarf/dwarf2diehandler.h" |
|
58 #include "common/dwarf_cfi_to_module.h" |
|
59 #include "common/dwarf_cu_to_module.h" |
|
60 #include "common/dwarf_line_to_module.h" |
|
61 #include "common/linux/elfutils.h" |
|
62 #include "common/linux/elfutils-inl.h" |
|
63 #include "common/linux/elf_symbols_to_module.h" |
|
64 #include "common/linux/file_id.h" |
|
65 #include "common/module.h" |
|
66 #include "common/scoped_ptr.h" |
|
67 #ifndef NO_STABS_SUPPORT |
|
68 #include "common/stabs_reader.h" |
|
69 #include "common/stabs_to_module.h" |
|
70 #endif |
|
71 #include "common/using_std_string.h" |
|
72 #include "common/logging.h" |
|
73 |
|
74 #ifndef SHT_ARM_EXIDX |
|
75 // bionic and older glibc don't define it |
|
76 # define SHT_ARM_EXIDX (SHT_LOPROC + 1) |
|
77 #endif |
|
78 |
|
79 // This namespace contains helper functions. |
|
80 namespace { |
|
81 |
|
82 using google_breakpad::DwarfCFIToModule; |
|
83 using google_breakpad::DwarfCUToModule; |
|
84 using google_breakpad::DwarfLineToModule; |
|
85 using google_breakpad::ElfClass; |
|
86 using google_breakpad::ElfClass32; |
|
87 using google_breakpad::ElfClass64; |
|
88 using google_breakpad::FindElfSectionByName; |
|
89 using google_breakpad::GetOffset; |
|
90 using google_breakpad::IsValidElf; |
|
91 using google_breakpad::Module; |
|
92 #ifndef NO_STABS_SUPPORT |
|
93 using google_breakpad::StabsToModule; |
|
94 #endif |
|
95 using google_breakpad::UniqueString; |
|
96 using google_breakpad::scoped_ptr; |
|
97 |
|
98 // |
|
99 // FDWrapper |
|
100 // |
|
101 // Wrapper class to make sure opened file is closed. |
|
102 // |
|
103 class FDWrapper { |
|
104 public: |
|
105 explicit FDWrapper(int fd) : |
|
106 fd_(fd) {} |
|
107 ~FDWrapper() { |
|
108 if (fd_ != -1) |
|
109 close(fd_); |
|
110 } |
|
111 int get() { |
|
112 return fd_; |
|
113 } |
|
114 int release() { |
|
115 int fd = fd_; |
|
116 fd_ = -1; |
|
117 return fd; |
|
118 } |
|
119 private: |
|
120 int fd_; |
|
121 }; |
|
122 |
|
123 // |
|
124 // MmapWrapper |
|
125 // |
|
126 // Wrapper class to make sure mapped regions are unmapped. |
|
127 // |
|
128 class MmapWrapper { |
|
129 public: |
|
130 MmapWrapper() : is_set_(false) {} |
|
131 ~MmapWrapper() { |
|
132 if (is_set_ && base_ != NULL) { |
|
133 assert(size_ > 0); |
|
134 munmap(base_, size_); |
|
135 } |
|
136 } |
|
137 void set(void *mapped_address, size_t mapped_size) { |
|
138 is_set_ = true; |
|
139 base_ = mapped_address; |
|
140 size_ = mapped_size; |
|
141 } |
|
142 void release() { |
|
143 assert(is_set_); |
|
144 is_set_ = false; |
|
145 base_ = NULL; |
|
146 size_ = 0; |
|
147 } |
|
148 |
|
149 private: |
|
150 bool is_set_; |
|
151 void *base_; |
|
152 size_t size_; |
|
153 }; |
|
154 |
|
155 // Find the preferred loading address of the binary. |
|
156 template<typename ElfClass> |
|
157 typename ElfClass::Addr GetLoadingAddress( |
|
158 const typename ElfClass::Phdr* program_headers, |
|
159 int nheader) { |
|
160 typedef typename ElfClass::Phdr Phdr; |
|
161 |
|
162 for (int i = 0; i < nheader; ++i) { |
|
163 const Phdr& header = program_headers[i]; |
|
164 // For executable, it is the PT_LOAD segment with offset to zero. |
|
165 if (header.p_type == PT_LOAD && |
|
166 header.p_offset == 0) |
|
167 return header.p_vaddr; |
|
168 } |
|
169 // For other types of ELF, return 0. |
|
170 return 0; |
|
171 } |
|
172 |
|
173 #ifndef NO_STABS_SUPPORT |
|
174 template<typename ElfClass> |
|
175 bool LoadStabs(const typename ElfClass::Ehdr* elf_header, |
|
176 const typename ElfClass::Shdr* stab_section, |
|
177 const typename ElfClass::Shdr* stabstr_section, |
|
178 const bool big_endian, |
|
179 Module* module) { |
|
180 // A callback object to handle data from the STABS reader. |
|
181 StabsToModule handler(module); |
|
182 // Find the addresses of the STABS data, and create a STABS reader object. |
|
183 // On Linux, STABS entries always have 32-bit values, regardless of the |
|
184 // address size of the architecture whose code they're describing, and |
|
185 // the strings are always "unitized". |
|
186 const uint8_t* stabs = |
|
187 GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset); |
|
188 const uint8_t* stabstr = |
|
189 GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset); |
|
190 google_breakpad::StabsReader reader(stabs, stab_section->sh_size, |
|
191 stabstr, stabstr_section->sh_size, |
|
192 big_endian, 4, true, &handler); |
|
193 // Read the STABS data, and do post-processing. |
|
194 if (!reader.Process()) |
|
195 return false; |
|
196 handler.Finalize(); |
|
197 return true; |
|
198 } |
|
199 #endif // NO_STABS_SUPPORT |
|
200 |
|
201 // A line-to-module loader that accepts line number info parsed by |
|
202 // dwarf2reader::LineInfo and populates a Module and a line vector |
|
203 // with the results. |
|
204 class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler { |
|
205 public: |
|
206 // Create a line-to-module converter using BYTE_READER. |
|
207 explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader) |
|
208 : byte_reader_(byte_reader) { } |
|
209 void StartCompilationUnit(const string& compilation_dir) { |
|
210 compilation_dir_ = compilation_dir; |
|
211 } |
|
212 void ReadProgram(const char *program, uint64 length, |
|
213 Module *module, std::vector<Module::Line> *lines) { |
|
214 DwarfLineToModule handler(module, compilation_dir_, lines); |
|
215 dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); |
|
216 parser.Start(); |
|
217 } |
|
218 private: |
|
219 string compilation_dir_; |
|
220 dwarf2reader::ByteReader *byte_reader_; |
|
221 }; |
|
222 |
|
223 template<typename ElfClass> |
|
224 bool LoadDwarf(const string& dwarf_filename, |
|
225 const typename ElfClass::Ehdr* elf_header, |
|
226 const bool big_endian, |
|
227 Module* module) { |
|
228 typedef typename ElfClass::Shdr Shdr; |
|
229 |
|
230 const dwarf2reader::Endianness endianness = big_endian ? |
|
231 dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; |
|
232 dwarf2reader::ByteReader byte_reader(endianness); |
|
233 |
|
234 // Construct a context for this file. |
|
235 DwarfCUToModule::FileContext file_context(dwarf_filename, module); |
|
236 |
|
237 // Build a map of the ELF file's sections. |
|
238 const Shdr* sections = |
|
239 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); |
|
240 int num_sections = elf_header->e_shnum; |
|
241 const Shdr* section_names = sections + elf_header->e_shstrndx; |
|
242 for (int i = 0; i < num_sections; i++) { |
|
243 const Shdr* section = §ions[i]; |
|
244 string name = GetOffset<ElfClass, char>(elf_header, |
|
245 section_names->sh_offset) + |
|
246 section->sh_name; |
|
247 const char* contents = GetOffset<ElfClass, char>(elf_header, |
|
248 section->sh_offset); |
|
249 uint64 length = section->sh_size; |
|
250 file_context.section_map[name] = std::make_pair(contents, length); |
|
251 } |
|
252 |
|
253 // Parse all the compilation units in the .debug_info section. |
|
254 DumperLineToModule line_to_module(&byte_reader); |
|
255 std::pair<const char *, uint64> debug_info_section |
|
256 = file_context.section_map[".debug_info"]; |
|
257 // This should never have been called if the file doesn't have a |
|
258 // .debug_info section. |
|
259 assert(debug_info_section.first); |
|
260 uint64 debug_info_length = debug_info_section.second; |
|
261 for (uint64 offset = 0; offset < debug_info_length;) { |
|
262 // Make a handler for the root DIE that populates MODULE with the |
|
263 // data that was found. |
|
264 DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset); |
|
265 DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); |
|
266 // Make a Dwarf2Handler that drives the DIEHandler. |
|
267 dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); |
|
268 // Make a DWARF parser for the compilation unit at OFFSET. |
|
269 dwarf2reader::CompilationUnit reader(file_context.section_map, |
|
270 offset, |
|
271 &byte_reader, |
|
272 &die_dispatcher); |
|
273 // Process the entire compilation unit; get the offset of the next. |
|
274 offset += reader.Start(); |
|
275 } |
|
276 return true; |
|
277 } |
|
278 |
|
279 // Fill REGISTER_NAMES with the register names appropriate to the |
|
280 // machine architecture given in HEADER, indexed by the register |
|
281 // numbers used in DWARF call frame information. Return true on |
|
282 // success, or false if HEADER's machine architecture is not |
|
283 // supported. |
|
284 template<typename ElfClass> |
|
285 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, |
|
286 std::vector<const UniqueString*>* register_names) { |
|
287 switch (elf_header->e_machine) { |
|
288 case EM_386: |
|
289 *register_names = DwarfCFIToModule::RegisterNames::I386(); |
|
290 return true; |
|
291 case EM_ARM: |
|
292 *register_names = DwarfCFIToModule::RegisterNames::ARM(); |
|
293 return true; |
|
294 case EM_X86_64: |
|
295 *register_names = DwarfCFIToModule::RegisterNames::X86_64(); |
|
296 return true; |
|
297 default: |
|
298 return false; |
|
299 } |
|
300 } |
|
301 |
|
302 template<typename ElfClass> |
|
303 bool LoadDwarfCFI(const string& dwarf_filename, |
|
304 const typename ElfClass::Ehdr* elf_header, |
|
305 const char* section_name, |
|
306 const typename ElfClass::Shdr* section, |
|
307 const bool eh_frame, |
|
308 const typename ElfClass::Shdr* got_section, |
|
309 const typename ElfClass::Shdr* text_section, |
|
310 const bool big_endian, |
|
311 Module* module) { |
|
312 // Find the appropriate set of register names for this file's |
|
313 // architecture. |
|
314 std::vector<const UniqueString*> register_names; |
|
315 if (!DwarfCFIRegisterNames<ElfClass>(elf_header, ®ister_names)) { |
|
316 fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" |
|
317 " cannot convert DWARF call frame information\n", |
|
318 dwarf_filename.c_str(), elf_header->e_machine); |
|
319 return false; |
|
320 } |
|
321 |
|
322 const dwarf2reader::Endianness endianness = big_endian ? |
|
323 dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; |
|
324 |
|
325 // Find the call frame information and its size. |
|
326 const char* cfi = |
|
327 GetOffset<ElfClass, char>(elf_header, section->sh_offset); |
|
328 size_t cfi_size = section->sh_size; |
|
329 |
|
330 // Plug together the parser, handler, and their entourages. |
|
331 DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name); |
|
332 DwarfCFIToModule handler(module, register_names, &module_reporter); |
|
333 dwarf2reader::ByteReader byte_reader(endianness); |
|
334 |
|
335 byte_reader.SetAddressSize(ElfClass::kAddrSize); |
|
336 |
|
337 // Provide the base addresses for .eh_frame encoded pointers, if |
|
338 // possible. |
|
339 byte_reader.SetCFIDataBase(section->sh_addr, cfi); |
|
340 if (got_section) |
|
341 byte_reader.SetDataBase(got_section->sh_addr); |
|
342 if (text_section) |
|
343 byte_reader.SetTextBase(text_section->sh_addr); |
|
344 |
|
345 dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename, |
|
346 section_name); |
|
347 dwarf2reader::CallFrameInfo parser(cfi, cfi_size, |
|
348 &byte_reader, &handler, &dwarf_reporter, |
|
349 eh_frame); |
|
350 parser.Start(); |
|
351 return true; |
|
352 } |
|
353 |
|
354 template<typename ElfClass> |
|
355 bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header, |
|
356 const typename ElfClass::Shdr* exidx_section, |
|
357 const typename ElfClass::Shdr* extab_section, |
|
358 uint32_t loading_addr, |
|
359 Module* module) { |
|
360 // To do this properly we need to know: |
|
361 // * the bounds of the .ARM.exidx section in the mapped image |
|
362 // * the bounds of the .ARM.extab section in the mapped image |
|
363 // * the vma of the last byte in the text section associated with the .exidx |
|
364 // The first two are easy. The third is a bit tricky. If we can't |
|
365 // figure out what it is, just pass in zero. |
|
366 const char *exidx_img |
|
367 = GetOffset<ElfClass, char>(elf_header, exidx_section->sh_offset); |
|
368 size_t exidx_size = exidx_section->sh_size; |
|
369 const char *extab_img |
|
370 = GetOffset<ElfClass, char>(elf_header, extab_section->sh_offset); |
|
371 size_t extab_size = extab_section->sh_size; |
|
372 |
|
373 // The sh_link field of the exidx section gives the section number |
|
374 // for the associated text section. |
|
375 uint32_t exidx_text_last_svma = 0; |
|
376 int exidx_text_sno = exidx_section->sh_link; |
|
377 typedef typename ElfClass::Shdr Shdr; |
|
378 // |sections| points to the section header table |
|
379 const Shdr* sections |
|
380 = GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); |
|
381 const int num_sections = elf_header->e_shnum; |
|
382 if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) { |
|
383 const Shdr* exidx_text_shdr = §ions[exidx_text_sno]; |
|
384 if (exidx_text_shdr->sh_size > 0) { |
|
385 exidx_text_last_svma |
|
386 = exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1; |
|
387 } |
|
388 } |
|
389 |
|
390 arm_ex_to_module::ARMExToModule handler(module); |
|
391 arm_ex_reader::ExceptionTableInfo |
|
392 parser(exidx_img, exidx_size, extab_img, extab_size, exidx_text_last_svma, |
|
393 &handler, |
|
394 reinterpret_cast<const char*>(elf_header), |
|
395 loading_addr); |
|
396 parser.Start(); |
|
397 return true; |
|
398 } |
|
399 |
|
400 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, |
|
401 void** elf_header) { |
|
402 int obj_fd = open(obj_file.c_str(), O_RDONLY); |
|
403 if (obj_fd < 0) { |
|
404 fprintf(stderr, "Failed to open ELF file '%s': %s\n", |
|
405 obj_file.c_str(), strerror(errno)); |
|
406 return false; |
|
407 } |
|
408 FDWrapper obj_fd_wrapper(obj_fd); |
|
409 struct stat st; |
|
410 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { |
|
411 fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", |
|
412 obj_file.c_str(), strerror(errno)); |
|
413 return false; |
|
414 } |
|
415 void *obj_base = mmap(NULL, st.st_size, |
|
416 PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0); |
|
417 if (obj_base == MAP_FAILED) { |
|
418 fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", |
|
419 obj_file.c_str(), strerror(errno)); |
|
420 return false; |
|
421 } |
|
422 map_wrapper->set(obj_base, st.st_size); |
|
423 *elf_header = obj_base; |
|
424 if (!IsValidElf(*elf_header)) { |
|
425 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); |
|
426 return false; |
|
427 } |
|
428 return true; |
|
429 } |
|
430 |
|
431 // Get the endianness of ELF_HEADER. If it's invalid, return false. |
|
432 template<typename ElfClass> |
|
433 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, |
|
434 bool* big_endian) { |
|
435 if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { |
|
436 *big_endian = false; |
|
437 return true; |
|
438 } |
|
439 if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { |
|
440 *big_endian = true; |
|
441 return true; |
|
442 } |
|
443 |
|
444 fprintf(stderr, "bad data encoding in ELF header: %d\n", |
|
445 elf_header->e_ident[EI_DATA]); |
|
446 return false; |
|
447 } |
|
448 |
|
449 // Read the .gnu_debuglink and get the debug file name. If anything goes |
|
450 // wrong, return an empty string. |
|
451 template<typename ElfClass> |
|
452 string ReadDebugLink(const char* debuglink, |
|
453 size_t debuglink_size, |
|
454 const string& obj_file, |
|
455 const std::vector<string>& debug_dirs) { |
|
456 size_t debuglink_len = strlen(debuglink) + 5; // '\0' + CRC32. |
|
457 debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round to nearest 4 bytes. |
|
458 |
|
459 // Sanity check. |
|
460 if (debuglink_len != debuglink_size) { |
|
461 fprintf(stderr, "Mismatched .gnu_debuglink string / section size: " |
|
462 "%zx %zx\n", debuglink_len, debuglink_size); |
|
463 return ""; |
|
464 } |
|
465 |
|
466 bool found = false; |
|
467 int debuglink_fd = -1; |
|
468 string debuglink_path; |
|
469 std::vector<string>::const_iterator it; |
|
470 for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) { |
|
471 const string& debug_dir = *it; |
|
472 debuglink_path = debug_dir + "/" + debuglink; |
|
473 debuglink_fd = open(debuglink_path.c_str(), O_RDONLY); |
|
474 if (debuglink_fd >= 0) { |
|
475 found = true; |
|
476 break; |
|
477 } |
|
478 } |
|
479 |
|
480 if (!found) { |
|
481 fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n", |
|
482 obj_file.c_str()); |
|
483 for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) { |
|
484 const string debug_dir = *it; |
|
485 fprintf(stderr, " %s/%s\n", debug_dir.c_str(), debuglink); |
|
486 } |
|
487 return ""; |
|
488 } |
|
489 |
|
490 FDWrapper debuglink_fd_wrapper(debuglink_fd); |
|
491 // TODO(thestig) check the CRC-32 at the end of the .gnu_debuglink |
|
492 // section. |
|
493 |
|
494 return debuglink_path; |
|
495 } |
|
496 |
|
497 // |
|
498 // LoadSymbolsInfo |
|
499 // |
|
500 // Holds the state between the two calls to LoadSymbols() in case it's necessary |
|
501 // to follow the .gnu_debuglink section and load debug information from a |
|
502 // different file. |
|
503 // |
|
504 template<typename ElfClass> |
|
505 class LoadSymbolsInfo { |
|
506 public: |
|
507 typedef typename ElfClass::Addr Addr; |
|
508 |
|
509 explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) : |
|
510 debug_dirs_(dbg_dirs), |
|
511 has_loading_addr_(false) {} |
|
512 |
|
513 // Keeps track of which sections have been loaded so sections don't |
|
514 // accidentally get loaded twice from two different files. |
|
515 void LoadedSection(const string §ion) { |
|
516 if (loaded_sections_.count(section) == 0) { |
|
517 loaded_sections_.insert(section); |
|
518 } else { |
|
519 fprintf(stderr, "Section %s has already been loaded.\n", |
|
520 section.c_str()); |
|
521 } |
|
522 } |
|
523 |
|
524 // The ELF file and linked debug file are expected to have the same preferred |
|
525 // loading address. |
|
526 void set_loading_addr(Addr addr, const string &filename) { |
|
527 if (!has_loading_addr_) { |
|
528 loading_addr_ = addr; |
|
529 loaded_file_ = filename; |
|
530 return; |
|
531 } |
|
532 |
|
533 if (addr != loading_addr_) { |
|
534 fprintf(stderr, |
|
535 "ELF file '%s' and debug ELF file '%s' " |
|
536 "have different load addresses.\n", |
|
537 loaded_file_.c_str(), filename.c_str()); |
|
538 assert(false); |
|
539 } |
|
540 } |
|
541 |
|
542 // Setters and getters |
|
543 const std::vector<string>& debug_dirs() const { |
|
544 return debug_dirs_; |
|
545 } |
|
546 |
|
547 string debuglink_file() const { |
|
548 return debuglink_file_; |
|
549 } |
|
550 void set_debuglink_file(string file) { |
|
551 debuglink_file_ = file; |
|
552 } |
|
553 |
|
554 private: |
|
555 const std::vector<string>& debug_dirs_; // Directories in which to |
|
556 // search for the debug ELF file. |
|
557 |
|
558 string debuglink_file_; // Full path to the debug ELF file. |
|
559 |
|
560 bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. |
|
561 |
|
562 Addr loading_addr_; // Saves the preferred loading address from the |
|
563 // first call to LoadSymbols(). |
|
564 |
|
565 string loaded_file_; // Name of the file loaded from the first call to |
|
566 // LoadSymbols(). |
|
567 |
|
568 std::set<string> loaded_sections_; // Tracks the Loaded ELF sections |
|
569 // between calls to LoadSymbols(). |
|
570 }; |
|
571 |
|
572 template<typename ElfClass> |
|
573 bool LoadSymbols(const string& obj_file, |
|
574 const bool big_endian, |
|
575 const typename ElfClass::Ehdr* elf_header, |
|
576 const bool read_gnu_debug_link, |
|
577 LoadSymbolsInfo<ElfClass>* info, |
|
578 SymbolData symbol_data, |
|
579 Module* module) { |
|
580 typedef typename ElfClass::Addr Addr; |
|
581 typedef typename ElfClass::Phdr Phdr; |
|
582 typedef typename ElfClass::Shdr Shdr; |
|
583 |
|
584 BPLOG(INFO) << ""; |
|
585 BPLOG(INFO) << "LoadSymbols: BEGIN " << obj_file; |
|
586 |
|
587 Addr loading_addr = GetLoadingAddress<ElfClass>( |
|
588 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), |
|
589 elf_header->e_phnum); |
|
590 module->SetLoadAddress(loading_addr); |
|
591 info->set_loading_addr(loading_addr, obj_file); |
|
592 |
|
593 const Shdr* sections = |
|
594 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); |
|
595 const Shdr* section_names = sections + elf_header->e_shstrndx; |
|
596 const char* names = |
|
597 GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); |
|
598 const char *names_end = names + section_names->sh_size; |
|
599 bool found_debug_info_section = false; |
|
600 bool found_usable_info = false; |
|
601 |
|
602 if (symbol_data != ONLY_CFI) { |
|
603 #ifndef NO_STABS_SUPPORT |
|
604 // Look for STABS debugging information, and load it if present. |
|
605 const Shdr* stab_section = |
|
606 FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS, |
|
607 sections, names, names_end, |
|
608 elf_header->e_shnum); |
|
609 if (stab_section) { |
|
610 const Shdr* stabstr_section = stab_section->sh_link + sections; |
|
611 if (stabstr_section) { |
|
612 found_debug_info_section = true; |
|
613 found_usable_info = true; |
|
614 info->LoadedSection(".stab"); |
|
615 if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section, |
|
616 big_endian, module)) { |
|
617 fprintf(stderr, "%s: \".stab\" section found, but failed to load" |
|
618 " STABS debugging information\n", obj_file.c_str()); |
|
619 } |
|
620 } |
|
621 } |
|
622 #endif // NO_STABS_SUPPORT |
|
623 |
|
624 // Look for DWARF debugging information, and load it if present. |
|
625 const Shdr* dwarf_section = |
|
626 FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS, |
|
627 sections, names, names_end, |
|
628 elf_header->e_shnum); |
|
629 if (dwarf_section) { |
|
630 found_debug_info_section = true; |
|
631 found_usable_info = true; |
|
632 info->LoadedSection(".debug_info"); |
|
633 if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian, module)) |
|
634 fprintf(stderr, "%s: \".debug_info\" section found, but failed to load " |
|
635 "DWARF debugging information\n", obj_file.c_str()); |
|
636 } |
|
637 } |
|
638 |
|
639 if (symbol_data != NO_CFI) { |
|
640 // Dwarf Call Frame Information (CFI) is actually independent from |
|
641 // the other DWARF debugging information, and can be used alone. |
|
642 const Shdr* dwarf_cfi_section = |
|
643 FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, |
|
644 sections, names, names_end, |
|
645 elf_header->e_shnum); |
|
646 if (dwarf_cfi_section) { |
|
647 // Ignore the return value of this function; even without call frame |
|
648 // information, the other debugging information could be perfectly |
|
649 // useful. |
|
650 info->LoadedSection(".debug_frame"); |
|
651 bool result = |
|
652 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame", |
|
653 dwarf_cfi_section, false, 0, 0, big_endian, |
|
654 module); |
|
655 found_usable_info = found_usable_info || result; |
|
656 if (result) |
|
657 BPLOG(INFO) << "LoadSymbols: read CFI from .debug_frame"; |
|
658 } |
|
659 |
|
660 // Linux C++ exception handling information can also provide |
|
661 // unwinding data. |
|
662 const Shdr* eh_frame_section = |
|
663 FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, |
|
664 sections, names, names_end, |
|
665 elf_header->e_shnum); |
|
666 if (eh_frame_section) { |
|
667 // Pointers in .eh_frame data may be relative to the base addresses of |
|
668 // certain sections. Provide those sections if present. |
|
669 const Shdr* got_section = |
|
670 FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS, |
|
671 sections, names, names_end, |
|
672 elf_header->e_shnum); |
|
673 const Shdr* text_section = |
|
674 FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS, |
|
675 sections, names, names_end, |
|
676 elf_header->e_shnum); |
|
677 info->LoadedSection(".eh_frame"); |
|
678 // As above, ignore the return value of this function. |
|
679 bool result = |
|
680 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame", |
|
681 eh_frame_section, true, |
|
682 got_section, text_section, big_endian, module); |
|
683 found_usable_info = found_usable_info || result; |
|
684 if (result) |
|
685 BPLOG(INFO) << "LoadSymbols: read CFI from .eh_frame"; |
|
686 } |
|
687 } |
|
688 |
|
689 // ARM has special unwind tables that can be used. |
|
690 const Shdr* arm_exidx_section = |
|
691 FindElfSectionByName<ElfClass>(".ARM.exidx", SHT_ARM_EXIDX, |
|
692 sections, names, names_end, |
|
693 elf_header->e_shnum); |
|
694 const Shdr* arm_extab_section = |
|
695 FindElfSectionByName<ElfClass>(".ARM.extab", SHT_PROGBITS, |
|
696 sections, names, names_end, |
|
697 elf_header->e_shnum); |
|
698 // Only load information from this section if there isn't a .debug_info |
|
699 // section. |
|
700 if (!found_debug_info_section |
|
701 && arm_exidx_section && arm_extab_section && symbol_data != NO_CFI) { |
|
702 info->LoadedSection(".ARM.exidx"); |
|
703 info->LoadedSection(".ARM.extab"); |
|
704 bool result = LoadARMexidx<ElfClass>(elf_header, |
|
705 arm_exidx_section, arm_extab_section, |
|
706 loading_addr, module); |
|
707 found_usable_info = found_usable_info || result; |
|
708 if (result) |
|
709 BPLOG(INFO) << "LoadSymbols: read EXIDX from .ARM.{exidx,extab}"; |
|
710 } |
|
711 |
|
712 if (!found_debug_info_section && symbol_data != ONLY_CFI) { |
|
713 fprintf(stderr, "%s: file contains no debugging information" |
|
714 " (no \".stab\" or \".debug_info\" sections)\n", |
|
715 obj_file.c_str()); |
|
716 |
|
717 // Failed, but maybe there's a .gnu_debuglink section? |
|
718 if (read_gnu_debug_link) { |
|
719 const Shdr* gnu_debuglink_section |
|
720 = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS, |
|
721 sections, names, |
|
722 names_end, elf_header->e_shnum); |
|
723 if (gnu_debuglink_section) { |
|
724 if (!info->debug_dirs().empty()) { |
|
725 const char* debuglink_contents = |
|
726 GetOffset<ElfClass, char>(elf_header, |
|
727 gnu_debuglink_section->sh_offset); |
|
728 string debuglink_file |
|
729 = ReadDebugLink<ElfClass>(debuglink_contents, |
|
730 gnu_debuglink_section->sh_size, |
|
731 obj_file, info->debug_dirs()); |
|
732 info->set_debuglink_file(debuglink_file); |
|
733 } else { |
|
734 fprintf(stderr, ".gnu_debuglink section found in '%s', " |
|
735 "but no debug path specified.\n", obj_file.c_str()); |
|
736 } |
|
737 } else { |
|
738 fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n", |
|
739 obj_file.c_str()); |
|
740 } |
|
741 } else { |
|
742 if (symbol_data != ONLY_CFI) { |
|
743 // The caller doesn't want to consult .gnu_debuglink. |
|
744 // See if there are export symbols available. |
|
745 const Shdr* dynsym_section = |
|
746 FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM, |
|
747 sections, names, names_end, |
|
748 elf_header->e_shnum); |
|
749 const Shdr* dynstr_section = |
|
750 FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB, |
|
751 sections, names, names_end, |
|
752 elf_header->e_shnum); |
|
753 if (dynsym_section && dynstr_section) { |
|
754 info->LoadedSection(".dynsym"); |
|
755 |
|
756 const uint8_t* dynsyms = |
|
757 GetOffset<ElfClass, uint8_t>(elf_header, |
|
758 dynsym_section->sh_offset); |
|
759 const uint8_t* dynstrs = |
|
760 GetOffset<ElfClass, uint8_t>(elf_header, |
|
761 dynstr_section->sh_offset); |
|
762 bool result = |
|
763 ELFSymbolsToModule(dynsyms, |
|
764 dynsym_section->sh_size, |
|
765 dynstrs, |
|
766 dynstr_section->sh_size, |
|
767 big_endian, |
|
768 ElfClass::kAddrSize, |
|
769 module); |
|
770 found_usable_info = found_usable_info || result; |
|
771 } |
|
772 } |
|
773 |
|
774 // Return true if some usable information was found, since |
|
775 // the caller doesn't want to use .gnu_debuglink. |
|
776 BPLOG(INFO) << "LoadSymbols: " |
|
777 << (found_usable_info ? "SUCCESS " : "FAILURE ") |
|
778 << obj_file; |
|
779 return found_usable_info; |
|
780 } |
|
781 |
|
782 // No debug info was found, let the user try again with .gnu_debuglink |
|
783 // if present. |
|
784 BPLOG(INFO) << "LoadSymbols: FAILURE " << obj_file; |
|
785 return false; |
|
786 } |
|
787 |
|
788 BPLOG(INFO) << "LoadSymbols: SUCCESS " << obj_file; |
|
789 return true; |
|
790 } |
|
791 |
|
792 // Return the breakpad symbol file identifier for the architecture of |
|
793 // ELF_HEADER. |
|
794 template<typename ElfClass> |
|
795 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { |
|
796 typedef typename ElfClass::Half Half; |
|
797 Half arch = elf_header->e_machine; |
|
798 switch (arch) { |
|
799 case EM_386: return "x86"; |
|
800 case EM_ARM: return "arm"; |
|
801 case EM_MIPS: return "mips"; |
|
802 case EM_PPC64: return "ppc64"; |
|
803 case EM_PPC: return "ppc"; |
|
804 case EM_S390: return "s390"; |
|
805 case EM_SPARC: return "sparc"; |
|
806 case EM_SPARCV9: return "sparcv9"; |
|
807 case EM_X86_64: return "x86_64"; |
|
808 default: return NULL; |
|
809 } |
|
810 } |
|
811 |
|
812 // Format the Elf file identifier in IDENTIFIER as a UUID with the |
|
813 // dashes removed. |
|
814 string FormatIdentifier(unsigned char identifier[16]) { |
|
815 char identifier_str[40]; |
|
816 google_breakpad::FileID::ConvertIdentifierToString( |
|
817 identifier, |
|
818 identifier_str, |
|
819 sizeof(identifier_str)); |
|
820 string id_no_dash; |
|
821 for (int i = 0; identifier_str[i] != '\0'; ++i) |
|
822 if (identifier_str[i] != '-') |
|
823 id_no_dash += identifier_str[i]; |
|
824 // Add an extra "0" by the end. PDB files on Windows have an 'age' |
|
825 // number appended to the end of the file identifier; this isn't |
|
826 // really used or necessary on other platforms, but be consistent. |
|
827 id_no_dash += '0'; |
|
828 return id_no_dash; |
|
829 } |
|
830 |
|
831 // Return the non-directory portion of FILENAME: the portion after the |
|
832 // last slash, or the whole filename if there are no slashes. |
|
833 string BaseFileName(const string &filename) { |
|
834 // Lots of copies! basename's behavior is less than ideal. |
|
835 char *c_filename = strdup(filename.c_str()); |
|
836 string base = basename(c_filename); |
|
837 free(c_filename); |
|
838 return base; |
|
839 } |
|
840 |
|
841 template<typename ElfClass> |
|
842 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, |
|
843 const string& obj_filename, |
|
844 const std::vector<string>& debug_dirs, |
|
845 SymbolData symbol_data, |
|
846 Module** out_module) { |
|
847 typedef typename ElfClass::Ehdr Ehdr; |
|
848 typedef typename ElfClass::Shdr Shdr; |
|
849 |
|
850 *out_module = NULL; |
|
851 |
|
852 unsigned char identifier[16]; |
|
853 if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header, |
|
854 identifier)) { |
|
855 fprintf(stderr, "%s: unable to generate file identifier\n", |
|
856 obj_filename.c_str()); |
|
857 return false; |
|
858 } |
|
859 |
|
860 const char *architecture = ElfArchitecture<ElfClass>(elf_header); |
|
861 if (!architecture) { |
|
862 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", |
|
863 obj_filename.c_str(), elf_header->e_machine); |
|
864 return false; |
|
865 } |
|
866 |
|
867 // Figure out what endianness this file is. |
|
868 bool big_endian; |
|
869 if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) |
|
870 return false; |
|
871 |
|
872 string name = BaseFileName(obj_filename); |
|
873 string os = "Linux"; |
|
874 string id = FormatIdentifier(identifier); |
|
875 |
|
876 LoadSymbolsInfo<ElfClass> info(debug_dirs); |
|
877 scoped_ptr<Module> module(new Module(name, os, architecture, id)); |
|
878 if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header, |
|
879 !debug_dirs.empty(), &info, |
|
880 symbol_data, module.get())) { |
|
881 const string debuglink_file = info.debuglink_file(); |
|
882 if (debuglink_file.empty()) |
|
883 return false; |
|
884 |
|
885 // Load debuglink ELF file. |
|
886 fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); |
|
887 MmapWrapper debug_map_wrapper; |
|
888 Ehdr* debug_elf_header = NULL; |
|
889 if (!LoadELF(debuglink_file, &debug_map_wrapper, |
|
890 reinterpret_cast<void**>(&debug_elf_header))) |
|
891 return false; |
|
892 // Sanity checks to make sure everything matches up. |
|
893 const char *debug_architecture = |
|
894 ElfArchitecture<ElfClass>(debug_elf_header); |
|
895 if (!debug_architecture) { |
|
896 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", |
|
897 debuglink_file.c_str(), debug_elf_header->e_machine); |
|
898 return false; |
|
899 } |
|
900 if (strcmp(architecture, debug_architecture)) { |
|
901 fprintf(stderr, "%s with ELF machine architecture %s does not match " |
|
902 "%s with ELF architecture %s\n", |
|
903 debuglink_file.c_str(), debug_architecture, |
|
904 obj_filename.c_str(), architecture); |
|
905 return false; |
|
906 } |
|
907 |
|
908 bool debug_big_endian; |
|
909 if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian)) |
|
910 return false; |
|
911 if (debug_big_endian != big_endian) { |
|
912 fprintf(stderr, "%s and %s does not match in endianness\n", |
|
913 obj_filename.c_str(), debuglink_file.c_str()); |
|
914 return false; |
|
915 } |
|
916 |
|
917 if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian, |
|
918 debug_elf_header, false, &info, |
|
919 symbol_data, module.get())) { |
|
920 return false; |
|
921 } |
|
922 } |
|
923 |
|
924 *out_module = module.release(); |
|
925 return true; |
|
926 } |
|
927 |
|
928 } // namespace |
|
929 |
|
930 namespace google_breakpad { |
|
931 |
|
932 // Not explicitly exported, but not static so it can be used in unit tests. |
|
933 bool ReadSymbolDataInternal(const uint8_t* obj_file, |
|
934 const string& obj_filename, |
|
935 const std::vector<string>& debug_dirs, |
|
936 SymbolData symbol_data, |
|
937 Module** module) { |
|
938 |
|
939 if (!IsValidElf(obj_file)) { |
|
940 fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); |
|
941 return false; |
|
942 } |
|
943 |
|
944 int elfclass = ElfClass(obj_file); |
|
945 if (elfclass == ELFCLASS32) { |
|
946 return ReadSymbolDataElfClass<ElfClass32>( |
|
947 reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs, |
|
948 symbol_data, module); |
|
949 } |
|
950 if (elfclass == ELFCLASS64) { |
|
951 return ReadSymbolDataElfClass<ElfClass64>( |
|
952 reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs, |
|
953 symbol_data, module); |
|
954 } |
|
955 |
|
956 return false; |
|
957 } |
|
958 |
|
959 bool WriteSymbolFile(const string &obj_file, |
|
960 const std::vector<string>& debug_dirs, |
|
961 SymbolData symbol_data, |
|
962 std::ostream &sym_stream) { |
|
963 Module* module; |
|
964 if (!ReadSymbolData(obj_file, debug_dirs, symbol_data, &module)) |
|
965 return false; |
|
966 |
|
967 bool result = module->Write(sym_stream, symbol_data); |
|
968 delete module; |
|
969 return result; |
|
970 } |
|
971 |
|
972 bool ReadSymbolData(const string& obj_file, |
|
973 const std::vector<string>& debug_dirs, |
|
974 SymbolData symbol_data, |
|
975 Module** module) { |
|
976 MmapWrapper map_wrapper; |
|
977 void* elf_header = NULL; |
|
978 if (!LoadELF(obj_file, &map_wrapper, &elf_header)) |
|
979 return false; |
|
980 |
|
981 return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header), |
|
982 obj_file, debug_dirs, symbol_data, module); |
|
983 } |
|
984 |
|
985 } // namespace google_breakpad |