|
1 // Copyright (c) 2010 Google Inc. |
|
2 // All rights reserved. |
|
3 // |
|
4 // Redistribution and use in source and binary forms, with or without |
|
5 // modification, are permitted provided that the following conditions are |
|
6 // met: |
|
7 // |
|
8 // * Redistributions of source code must retain the above copyright |
|
9 // notice, this list of conditions and the following disclaimer. |
|
10 // * Redistributions in binary form must reproduce the above |
|
11 // copyright notice, this list of conditions and the following disclaimer |
|
12 // in the documentation and/or other materials provided with the |
|
13 // distribution. |
|
14 // * Neither the name of Google Inc. nor the names of its |
|
15 // contributors may be used to endorse or promote products derived from |
|
16 // this software without specific prior written permission. |
|
17 // |
|
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 |
|
30 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
|
31 |
|
32 // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h. |
|
33 |
|
34 // For <inttypes.h> PRI* macros, before anything else might #include it. |
|
35 #ifndef __STDC_FORMAT_MACROS |
|
36 #define __STDC_FORMAT_MACROS |
|
37 #endif /* __STDC_FORMAT_MACROS */ |
|
38 |
|
39 #include "common/dwarf_cu_to_module.h" |
|
40 |
|
41 #include <assert.h> |
|
42 #if !defined(__ANDROID__) |
|
43 #include <cxxabi.h> |
|
44 #endif |
|
45 #include <inttypes.h> |
|
46 |
|
47 #include <algorithm> |
|
48 #include <set> |
|
49 #include <utility> |
|
50 #include <iomanip> |
|
51 |
|
52 #include "common/dwarf_line_to_module.h" |
|
53 #include "common/logging.h" |
|
54 |
|
55 namespace google_breakpad { |
|
56 |
|
57 using std::map; |
|
58 using std::pair; |
|
59 using std::set; |
|
60 using std::sort; |
|
61 using std::vector; |
|
62 |
|
63 // Data provided by a DWARF specification DIE. |
|
64 // |
|
65 // In DWARF, the DIE for a definition may contain a DW_AT_specification |
|
66 // attribute giving the offset of the corresponding declaration DIE, and |
|
67 // the definition DIE may omit information given in the declaration. For |
|
68 // example, it's common for a function's address range to appear only in |
|
69 // its definition DIE, but its name to appear only in its declaration |
|
70 // DIE. |
|
71 // |
|
72 // The dumper needs to be able to follow DW_AT_specification links to |
|
73 // bring all this information together in a FUNC record. Conveniently, |
|
74 // DIEs that are the target of such links have a DW_AT_declaration flag |
|
75 // set, so we can identify them when we first see them, and record their |
|
76 // contents for later reference. |
|
77 // |
|
78 // A Specification holds information gathered from a declaration DIE that |
|
79 // we may need if we find a DW_AT_specification link pointing to it. |
|
80 struct DwarfCUToModule::Specification { |
|
81 // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name. |
|
82 string qualified_name; |
|
83 |
|
84 // The name of the enclosing scope, or the empty string if there is none. |
|
85 string enclosing_name; |
|
86 |
|
87 // The name for the specification DIE itself, without any enclosing |
|
88 // name components. |
|
89 string unqualified_name; |
|
90 }; |
|
91 |
|
92 // An abstract origin -- base definition of an inline function. |
|
93 struct AbstractOrigin { |
|
94 AbstractOrigin() : name() {} |
|
95 AbstractOrigin(const string& name) : name(name) {} |
|
96 |
|
97 string name; |
|
98 }; |
|
99 |
|
100 typedef map<uint64, AbstractOrigin> AbstractOriginByOffset; |
|
101 |
|
102 // Data global to the DWARF-bearing file that is private to the |
|
103 // DWARF-to-Module process. |
|
104 struct DwarfCUToModule::FilePrivate { |
|
105 // A set of strings used in this CU. Before storing a string in one of |
|
106 // our data structures, insert it into this set, and then use the string |
|
107 // from the set. |
|
108 // |
|
109 // In some STL implementations, strings are reference-counted internally, |
|
110 // meaning that simply using strings from this set, even if passed by |
|
111 // value, assigned, or held directly in structures and containers |
|
112 // (map<string, ...>, for example), causes those strings to share a |
|
113 // single instance of each distinct piece of text. GNU's libstdc++ uses |
|
114 // reference counts, and I believe MSVC did as well, at some point. |
|
115 // However, C++ '11 implementations are moving away from reference |
|
116 // counting. |
|
117 // |
|
118 // In other implementations, string assignments copy the string's text, |
|
119 // so this set will actually hold yet another copy of the string (although |
|
120 // everything will still work). To improve memory consumption portably, |
|
121 // we will probably need to use pointers to strings held in this set. |
|
122 set<string> common_strings; |
|
123 |
|
124 // A map from offsets of DIEs within the .debug_info section to |
|
125 // Specifications describing those DIEs. Specification references can |
|
126 // cross compilation unit boundaries. |
|
127 SpecificationByOffset specifications; |
|
128 |
|
129 AbstractOriginByOffset origins; |
|
130 }; |
|
131 |
|
132 DwarfCUToModule::FileContext::FileContext(const string &filename_arg, |
|
133 Module *module_arg) |
|
134 : filename(filename_arg), module(module_arg) { |
|
135 file_private = new FilePrivate(); |
|
136 } |
|
137 |
|
138 DwarfCUToModule::FileContext::~FileContext() { |
|
139 delete file_private; |
|
140 } |
|
141 |
|
142 // Information global to the particular compilation unit we're |
|
143 // parsing. This is for data shared across the CU's entire DIE tree, |
|
144 // and parameters from the code invoking the CU parser. |
|
145 struct DwarfCUToModule::CUContext { |
|
146 CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg) |
|
147 : file_context(file_context_arg), |
|
148 reporter(reporter_arg), |
|
149 language(Language::CPlusPlus) { } |
|
150 ~CUContext() { |
|
151 for (vector<Module::Function *>::iterator it = functions.begin(); |
|
152 it != functions.end(); it++) |
|
153 delete *it; |
|
154 }; |
|
155 |
|
156 // The DWARF-bearing file into which this CU was incorporated. |
|
157 FileContext *file_context; |
|
158 |
|
159 // For printing error messages. |
|
160 WarningReporter *reporter; |
|
161 |
|
162 // The source language of this compilation unit. |
|
163 const Language *language; |
|
164 |
|
165 // The functions defined in this compilation unit. We accumulate |
|
166 // them here during parsing. Then, in DwarfCUToModule::Finish, we |
|
167 // assign them lines and add them to file_context->module. |
|
168 // |
|
169 // Destroying this destroys all the functions this vector points to. |
|
170 vector<Module::Function *> functions; |
|
171 }; |
|
172 |
|
173 // Information about the context of a particular DIE. This is for |
|
174 // information that changes as we descend the tree towards the leaves: |
|
175 // the containing classes/namespaces, etc. |
|
176 struct DwarfCUToModule::DIEContext { |
|
177 // The fully-qualified name of the context. For example, for a |
|
178 // tree like: |
|
179 // |
|
180 // DW_TAG_namespace Foo |
|
181 // DW_TAG_class Bar |
|
182 // DW_TAG_subprogram Baz |
|
183 // |
|
184 // in a C++ compilation unit, the DIEContext's name for the |
|
185 // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's |
|
186 // name for the DW_TAG_namespace DIE would be "". |
|
187 string name; |
|
188 }; |
|
189 |
|
190 // An abstract base class for all the dumper's DIE handlers. |
|
191 class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler { |
|
192 public: |
|
193 // Create a handler for the DIE at OFFSET whose compilation unit is |
|
194 // described by CU_CONTEXT, and whose immediate context is described |
|
195 // by PARENT_CONTEXT. |
|
196 GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context, |
|
197 uint64 offset) |
|
198 : cu_context_(cu_context), |
|
199 parent_context_(parent_context), |
|
200 offset_(offset), |
|
201 declaration_(false), |
|
202 specification_(NULL) { } |
|
203 |
|
204 // Derived classes' ProcessAttributeUnsigned can defer to this to |
|
205 // handle DW_AT_declaration, or simply not override it. |
|
206 void ProcessAttributeUnsigned(enum DwarfAttribute attr, |
|
207 enum DwarfForm form, |
|
208 uint64 data); |
|
209 |
|
210 // Derived classes' ProcessAttributeReference can defer to this to |
|
211 // handle DW_AT_specification, or simply not override it. |
|
212 void ProcessAttributeReference(enum DwarfAttribute attr, |
|
213 enum DwarfForm form, |
|
214 uint64 data); |
|
215 |
|
216 // Derived classes' ProcessAttributeReference can defer to this to |
|
217 // handle DW_AT_specification, or simply not override it. |
|
218 void ProcessAttributeString(enum DwarfAttribute attr, |
|
219 enum DwarfForm form, |
|
220 const string &data); |
|
221 |
|
222 protected: |
|
223 // Compute and return the fully-qualified name of the DIE. If this |
|
224 // DIE is a declaration DIE, to be cited by other DIEs' |
|
225 // DW_AT_specification attributes, record its enclosing name and |
|
226 // unqualified name in the specification table. |
|
227 // |
|
228 // Use this from EndAttributes member functions, not ProcessAttribute* |
|
229 // functions; only the former can be sure that all the DIE's attributes |
|
230 // have been seen. |
|
231 string ComputeQualifiedName(); |
|
232 |
|
233 CUContext *cu_context_; |
|
234 DIEContext *parent_context_; |
|
235 uint64 offset_; |
|
236 |
|
237 // Place the name in the global set of strings. Even though this looks |
|
238 // like a copy, all the major std::string implementations use reference |
|
239 // counting internally, so the effect is to have all the data structures |
|
240 // share copies of strings whenever possible. |
|
241 // FIXME: Should this return something like a string_ref to avoid the |
|
242 // assumption about how strings are implemented? |
|
243 string AddStringToPool(const string &str); |
|
244 |
|
245 // If this DIE has a DW_AT_declaration attribute, this is its value. |
|
246 // It is false on DIEs with no DW_AT_declaration attribute. |
|
247 bool declaration_; |
|
248 |
|
249 // If this DIE has a DW_AT_specification attribute, this is the |
|
250 // Specification structure for the DIE the attribute refers to. |
|
251 // Otherwise, this is NULL. |
|
252 Specification *specification_; |
|
253 |
|
254 // The value of the DW_AT_name attribute, or the empty string if the |
|
255 // DIE has no such attribute. |
|
256 string name_attribute_; |
|
257 |
|
258 // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty |
|
259 // string if the DIE has no such attribute or its content could not be |
|
260 // demangled. |
|
261 string demangled_name_; |
|
262 }; |
|
263 |
|
264 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned( |
|
265 enum DwarfAttribute attr, |
|
266 enum DwarfForm form, |
|
267 uint64 data) { |
|
268 switch (attr) { |
|
269 case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break; |
|
270 default: break; |
|
271 } |
|
272 } |
|
273 |
|
274 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference( |
|
275 enum DwarfAttribute attr, |
|
276 enum DwarfForm form, |
|
277 uint64 data) { |
|
278 switch (attr) { |
|
279 case dwarf2reader::DW_AT_specification: { |
|
280 // Find the Specification to which this attribute refers, and |
|
281 // set specification_ appropriately. We could do more processing |
|
282 // here, but it's better to leave the real work to our |
|
283 // EndAttribute member function, at which point we know we have |
|
284 // seen all the DIE's attributes. |
|
285 FileContext *file_context = cu_context_->file_context; |
|
286 SpecificationByOffset *specifications |
|
287 = &file_context->file_private->specifications; |
|
288 SpecificationByOffset::iterator spec = specifications->find(data); |
|
289 if (spec != specifications->end()) { |
|
290 specification_ = &spec->second; |
|
291 } else { |
|
292 // Technically, there's no reason a DW_AT_specification |
|
293 // couldn't be a forward reference, but supporting that would |
|
294 // be a lot of work (changing to a two-pass structure), and I |
|
295 // don't think any producers we care about ever emit such |
|
296 // things. |
|
297 cu_context_->reporter->UnknownSpecification(offset_, data); |
|
298 } |
|
299 break; |
|
300 } |
|
301 default: break; |
|
302 } |
|
303 } |
|
304 |
|
305 string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string &str) { |
|
306 pair<set<string>::iterator, bool> result = |
|
307 cu_context_->file_context->file_private->common_strings.insert(str); |
|
308 return *result.first; |
|
309 } |
|
310 |
|
311 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( |
|
312 enum DwarfAttribute attr, |
|
313 enum DwarfForm form, |
|
314 const string &data) { |
|
315 switch (attr) { |
|
316 case dwarf2reader::DW_AT_name: |
|
317 name_attribute_ = AddStringToPool(data); |
|
318 break; |
|
319 case dwarf2reader::DW_AT_MIPS_linkage_name: { |
|
320 char* demangled = NULL; |
|
321 #if !defined(__ANDROID__) |
|
322 demangled = abi::__cxa_demangle(data.c_str(), NULL, NULL, NULL); |
|
323 #endif |
|
324 if (demangled) { |
|
325 demangled_name_ = AddStringToPool(demangled); |
|
326 free(reinterpret_cast<void*>(demangled)); |
|
327 } |
|
328 break; |
|
329 } |
|
330 default: break; |
|
331 } |
|
332 } |
|
333 |
|
334 string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { |
|
335 // Use the demangled name, if one is available. Demangled names are |
|
336 // preferable to those inferred from the DWARF structure because they |
|
337 // include argument types. |
|
338 const string *qualified_name = NULL; |
|
339 if (!demangled_name_.empty()) { |
|
340 // Found it is this DIE. |
|
341 qualified_name = &demangled_name_; |
|
342 } else if (specification_ && !specification_->qualified_name.empty()) { |
|
343 // Found it on the specification. |
|
344 qualified_name = &specification_->qualified_name; |
|
345 } |
|
346 |
|
347 const string *unqualified_name; |
|
348 const string *enclosing_name; |
|
349 if (!qualified_name) { |
|
350 // Find our unqualified name. If the DIE has its own DW_AT_name |
|
351 // attribute, then use that; otherwise, check our specification. |
|
352 if (name_attribute_.empty() && specification_) |
|
353 unqualified_name = &specification_->unqualified_name; |
|
354 else |
|
355 unqualified_name = &name_attribute_; |
|
356 |
|
357 // Find the name of our enclosing context. If we have a |
|
358 // specification, it's the specification's enclosing context that |
|
359 // counts; otherwise, use this DIE's context. |
|
360 if (specification_) |
|
361 enclosing_name = &specification_->enclosing_name; |
|
362 else |
|
363 enclosing_name = &parent_context_->name; |
|
364 } |
|
365 |
|
366 // If this DIE was marked as a declaration, record its names in the |
|
367 // specification table. |
|
368 if (declaration_) { |
|
369 FileContext *file_context = cu_context_->file_context; |
|
370 Specification spec; |
|
371 if (qualified_name) |
|
372 spec.qualified_name = *qualified_name; |
|
373 else { |
|
374 spec.enclosing_name = *enclosing_name; |
|
375 spec.unqualified_name = *unqualified_name; |
|
376 } |
|
377 file_context->file_private->specifications[offset_] = spec; |
|
378 } |
|
379 |
|
380 if (qualified_name) |
|
381 return *qualified_name; |
|
382 |
|
383 // Combine the enclosing name and unqualified name to produce our |
|
384 // own fully-qualified name. |
|
385 return cu_context_->language->MakeQualifiedName(*enclosing_name, |
|
386 *unqualified_name); |
|
387 } |
|
388 |
|
389 // A handler class for DW_TAG_subprogram DIEs. |
|
390 class DwarfCUToModule::FuncHandler: public GenericDIEHandler { |
|
391 public: |
|
392 FuncHandler(CUContext *cu_context, DIEContext *parent_context, |
|
393 uint64 offset) |
|
394 : GenericDIEHandler(cu_context, parent_context, offset), |
|
395 low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr), |
|
396 abstract_origin_(NULL), inline_(false) { } |
|
397 void ProcessAttributeUnsigned(enum DwarfAttribute attr, |
|
398 enum DwarfForm form, |
|
399 uint64 data); |
|
400 void ProcessAttributeSigned(enum DwarfAttribute attr, |
|
401 enum DwarfForm form, |
|
402 int64 data); |
|
403 void ProcessAttributeReference(enum DwarfAttribute attr, |
|
404 enum DwarfForm form, |
|
405 uint64 data); |
|
406 |
|
407 bool EndAttributes(); |
|
408 void Finish(); |
|
409 |
|
410 private: |
|
411 // The fully-qualified name, as derived from name_attribute_, |
|
412 // specification_, parent_context_. Computed in EndAttributes. |
|
413 string name_; |
|
414 uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc |
|
415 DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. |
|
416 const AbstractOrigin* abstract_origin_; |
|
417 bool inline_; |
|
418 }; |
|
419 |
|
420 void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned( |
|
421 enum DwarfAttribute attr, |
|
422 enum DwarfForm form, |
|
423 uint64 data) { |
|
424 switch (attr) { |
|
425 // If this attribute is present at all --- even if its value is |
|
426 // DW_INL_not_inlined --- then GCC may cite it as someone else's |
|
427 // DW_AT_abstract_origin attribute. |
|
428 case dwarf2reader::DW_AT_inline: inline_ = true; break; |
|
429 |
|
430 case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break; |
|
431 case dwarf2reader::DW_AT_high_pc: |
|
432 high_pc_form_ = form; |
|
433 high_pc_ = data; |
|
434 break; |
|
435 |
|
436 default: |
|
437 GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); |
|
438 break; |
|
439 } |
|
440 } |
|
441 |
|
442 void DwarfCUToModule::FuncHandler::ProcessAttributeSigned( |
|
443 enum DwarfAttribute attr, |
|
444 enum DwarfForm form, |
|
445 int64 data) { |
|
446 switch (attr) { |
|
447 // If this attribute is present at all --- even if its value is |
|
448 // DW_INL_not_inlined --- then GCC may cite it as someone else's |
|
449 // DW_AT_abstract_origin attribute. |
|
450 case dwarf2reader::DW_AT_inline: inline_ = true; break; |
|
451 |
|
452 default: |
|
453 break; |
|
454 } |
|
455 } |
|
456 |
|
457 void DwarfCUToModule::FuncHandler::ProcessAttributeReference( |
|
458 enum DwarfAttribute attr, |
|
459 enum DwarfForm form, |
|
460 uint64 data) { |
|
461 switch(attr) { |
|
462 case dwarf2reader::DW_AT_abstract_origin: { |
|
463 const AbstractOriginByOffset& origins = |
|
464 cu_context_->file_context->file_private->origins; |
|
465 AbstractOriginByOffset::const_iterator origin = origins.find(data); |
|
466 if (origin != origins.end()) { |
|
467 abstract_origin_ = &(origin->second); |
|
468 } else { |
|
469 cu_context_->reporter->UnknownAbstractOrigin(offset_, data); |
|
470 } |
|
471 break; |
|
472 } |
|
473 default: |
|
474 GenericDIEHandler::ProcessAttributeReference(attr, form, data); |
|
475 break; |
|
476 } |
|
477 } |
|
478 |
|
479 bool DwarfCUToModule::FuncHandler::EndAttributes() { |
|
480 // Compute our name, and record a specification, if appropriate. |
|
481 name_ = ComputeQualifiedName(); |
|
482 if (name_.empty() && abstract_origin_) { |
|
483 name_ = abstract_origin_->name; |
|
484 } |
|
485 return true; |
|
486 } |
|
487 |
|
488 void DwarfCUToModule::FuncHandler::Finish() { |
|
489 // Make high_pc_ an address, if it isn't already. |
|
490 if (high_pc_form_ != dwarf2reader::DW_FORM_addr) { |
|
491 high_pc_ += low_pc_; |
|
492 } |
|
493 |
|
494 // Did we collect the information we need? Not all DWARF function |
|
495 // entries have low and high addresses (for example, inlined |
|
496 // functions that were never used), but all the ones we're |
|
497 // interested in cover a non-empty range of bytes. |
|
498 if (low_pc_ < high_pc_) { |
|
499 // Create a Module::Function based on the data we've gathered, and |
|
500 // add it to the functions_ list. |
|
501 Module::Function *func = new Module::Function; |
|
502 // Malformed DWARF may omit the name, but all Module::Functions must |
|
503 // have names. |
|
504 if (!name_.empty()) { |
|
505 func->name = name_; |
|
506 } else { |
|
507 cu_context_->reporter->UnnamedFunction(offset_); |
|
508 func->name = "<name omitted>"; |
|
509 } |
|
510 func->address = low_pc_; |
|
511 func->size = high_pc_ - low_pc_; |
|
512 func->parameter_size = 0; |
|
513 if (func->address) { |
|
514 // If the function address is zero this is a sign that this function |
|
515 // description is just empty debug data and should just be discarded. |
|
516 cu_context_->functions.push_back(func); |
|
517 } |
|
518 } else if (inline_) { |
|
519 AbstractOrigin origin(name_); |
|
520 cu_context_->file_context->file_private->origins[offset_] = origin; |
|
521 } |
|
522 } |
|
523 |
|
524 // A handler for DIEs that contain functions and contribute a |
|
525 // component to their names: namespaces, classes, etc. |
|
526 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { |
|
527 public: |
|
528 NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context, |
|
529 uint64 offset) |
|
530 : GenericDIEHandler(cu_context, parent_context, offset) { } |
|
531 bool EndAttributes(); |
|
532 DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag); |
|
533 |
|
534 private: |
|
535 DIEContext child_context_; // A context for our children. |
|
536 }; |
|
537 |
|
538 bool DwarfCUToModule::NamedScopeHandler::EndAttributes() { |
|
539 child_context_.name = ComputeQualifiedName(); |
|
540 return true; |
|
541 } |
|
542 |
|
543 dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler( |
|
544 uint64 offset, |
|
545 enum DwarfTag tag) { |
|
546 switch (tag) { |
|
547 case dwarf2reader::DW_TAG_subprogram: |
|
548 return new FuncHandler(cu_context_, &child_context_, offset); |
|
549 case dwarf2reader::DW_TAG_namespace: |
|
550 case dwarf2reader::DW_TAG_class_type: |
|
551 case dwarf2reader::DW_TAG_structure_type: |
|
552 case dwarf2reader::DW_TAG_union_type: |
|
553 return new NamedScopeHandler(cu_context_, &child_context_, offset); |
|
554 default: |
|
555 return NULL; |
|
556 } |
|
557 } |
|
558 |
|
559 void DwarfCUToModule::WarningReporter::CUHeading() { |
|
560 if (printed_cu_header_) |
|
561 return; |
|
562 BPLOG(INFO) |
|
563 << filename_ << ": in compilation unit '" << cu_name_ |
|
564 << "' (offset 0x" << std::setbase(16) << cu_offset_ << std::setbase(10) |
|
565 << "):"; |
|
566 printed_cu_header_ = true; |
|
567 } |
|
568 |
|
569 void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset, |
|
570 uint64 target) { |
|
571 CUHeading(); |
|
572 BPLOG(INFO) |
|
573 << filename_ << ": the DIE at offset 0x" |
|
574 << std::setbase(16) << offset << std::setbase(10) |
|
575 << " has a DW_AT_specification attribute referring to the die at offset 0x" |
|
576 << std::setbase(16) << target << std::setbase(10) |
|
577 << ", which either was not marked as a declaration, or comes " |
|
578 << "later in the file"; |
|
579 } |
|
580 |
|
581 void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset, |
|
582 uint64 target) { |
|
583 CUHeading(); |
|
584 BPLOG(INFO) |
|
585 << filename_ << ": the DIE at offset 0x" |
|
586 << std::setbase(16) << offset << std::setbase(10) |
|
587 << " has a DW_AT_abstract_origin attribute referring to the die at" |
|
588 << " offset 0x" << std::setbase(16) << target << std::setbase(10) |
|
589 << ", which either was not marked as an inline, or comes " |
|
590 << "later in the file"; |
|
591 } |
|
592 |
|
593 void DwarfCUToModule::WarningReporter::MissingSection(const string &name) { |
|
594 CUHeading(); |
|
595 BPLOG(INFO) << filename_ << ": warning: couldn't find DWARF '" |
|
596 << name << "' section"; |
|
597 } |
|
598 |
|
599 void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) { |
|
600 CUHeading(); |
|
601 BPLOG(INFO) << filename_ << ": warning: line number data offset beyond " |
|
602 << "end of '.debug_line' section"; |
|
603 } |
|
604 |
|
605 void DwarfCUToModule::WarningReporter::UncoveredHeading() { |
|
606 if (printed_unpaired_header_) |
|
607 return; |
|
608 CUHeading(); |
|
609 BPLOG(INFO) << filename_ << ": warning: skipping unpaired lines/functions:"; |
|
610 printed_unpaired_header_ = true; |
|
611 } |
|
612 |
|
613 void DwarfCUToModule::WarningReporter::UncoveredFunction( |
|
614 const Module::Function &function) { |
|
615 if (!uncovered_warnings_enabled_) |
|
616 return; |
|
617 UncoveredHeading(); |
|
618 BPLOG(INFO) << " function" << (function.size == 0 ? " (zero-length)" : "") |
|
619 << ": " << function.name; |
|
620 } |
|
621 |
|
622 void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) { |
|
623 if (!uncovered_warnings_enabled_) |
|
624 return; |
|
625 UncoveredHeading(); |
|
626 BPLOG(INFO) << " line" << (line.size == 0 ? " (zero-length)" : "") |
|
627 << ": " << line.file->name << ":" << line.number |
|
628 << " at 0x" << std::setbase(16) << line.address << std::setbase(10); |
|
629 } |
|
630 |
|
631 void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) { |
|
632 CUHeading(); |
|
633 BPLOG(INFO) << filename_ << ": warning: function at offset 0x" |
|
634 << std::setbase(16) << offset << std::setbase(10) << " has no name"; |
|
635 } |
|
636 |
|
637 DwarfCUToModule::DwarfCUToModule(FileContext *file_context, |
|
638 LineToModuleHandler *line_reader, |
|
639 WarningReporter *reporter) |
|
640 : line_reader_(line_reader), has_source_line_info_(false) { |
|
641 cu_context_ = new CUContext(file_context, reporter); |
|
642 child_context_ = new DIEContext(); |
|
643 } |
|
644 |
|
645 DwarfCUToModule::~DwarfCUToModule() { |
|
646 delete cu_context_; |
|
647 delete child_context_; |
|
648 } |
|
649 |
|
650 void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr, |
|
651 enum DwarfForm form, |
|
652 int64 data) { |
|
653 switch (attr) { |
|
654 case dwarf2reader::DW_AT_language: // source language of this CU |
|
655 SetLanguage(static_cast<DwarfLanguage>(data)); |
|
656 break; |
|
657 default: |
|
658 break; |
|
659 } |
|
660 } |
|
661 |
|
662 void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr, |
|
663 enum DwarfForm form, |
|
664 uint64 data) { |
|
665 switch (attr) { |
|
666 case dwarf2reader::DW_AT_stmt_list: // Line number information. |
|
667 has_source_line_info_ = true; |
|
668 source_line_offset_ = data; |
|
669 break; |
|
670 case dwarf2reader::DW_AT_language: // source language of this CU |
|
671 SetLanguage(static_cast<DwarfLanguage>(data)); |
|
672 break; |
|
673 default: |
|
674 break; |
|
675 } |
|
676 } |
|
677 |
|
678 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr, |
|
679 enum DwarfForm form, |
|
680 const string &data) { |
|
681 switch (attr) { |
|
682 case dwarf2reader::DW_AT_name: |
|
683 cu_context_->reporter->SetCUName(data); |
|
684 break; |
|
685 case dwarf2reader::DW_AT_comp_dir: |
|
686 line_reader_->StartCompilationUnit(data); |
|
687 break; |
|
688 default: |
|
689 break; |
|
690 } |
|
691 } |
|
692 |
|
693 bool DwarfCUToModule::EndAttributes() { |
|
694 return true; |
|
695 } |
|
696 |
|
697 dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler( |
|
698 uint64 offset, |
|
699 enum DwarfTag tag) { |
|
700 switch (tag) { |
|
701 case dwarf2reader::DW_TAG_subprogram: |
|
702 return new FuncHandler(cu_context_, child_context_, offset); |
|
703 case dwarf2reader::DW_TAG_namespace: |
|
704 case dwarf2reader::DW_TAG_class_type: |
|
705 case dwarf2reader::DW_TAG_structure_type: |
|
706 case dwarf2reader::DW_TAG_union_type: |
|
707 return new NamedScopeHandler(cu_context_, child_context_, offset); |
|
708 default: |
|
709 return NULL; |
|
710 } |
|
711 } |
|
712 |
|
713 void DwarfCUToModule::SetLanguage(DwarfLanguage language) { |
|
714 switch (language) { |
|
715 case dwarf2reader::DW_LANG_Java: |
|
716 cu_context_->language = Language::Java; |
|
717 break; |
|
718 |
|
719 // DWARF has no generic language code for assembly language; this is |
|
720 // what the GNU toolchain uses. |
|
721 case dwarf2reader::DW_LANG_Mips_Assembler: |
|
722 cu_context_->language = Language::Assembler; |
|
723 break; |
|
724 |
|
725 // C++ covers so many cases that it probably has some way to cope |
|
726 // with whatever the other languages throw at us. So make it the |
|
727 // default. |
|
728 // |
|
729 // Objective C and Objective C++ seem to create entries for |
|
730 // methods whose DW_AT_name values are already fully-qualified: |
|
731 // "-[Classname method:]". These appear at the top level. |
|
732 // |
|
733 // DWARF data for C should never include namespaces or functions |
|
734 // nested in struct types, but if it ever does, then C++'s |
|
735 // notation is probably not a bad choice for that. |
|
736 default: |
|
737 case dwarf2reader::DW_LANG_ObjC: |
|
738 case dwarf2reader::DW_LANG_ObjC_plus_plus: |
|
739 case dwarf2reader::DW_LANG_C: |
|
740 case dwarf2reader::DW_LANG_C89: |
|
741 case dwarf2reader::DW_LANG_C99: |
|
742 case dwarf2reader::DW_LANG_C_plus_plus: |
|
743 cu_context_->language = Language::CPlusPlus; |
|
744 break; |
|
745 } |
|
746 } |
|
747 |
|
748 void DwarfCUToModule::ReadSourceLines(uint64 offset) { |
|
749 const dwarf2reader::SectionMap §ion_map |
|
750 = cu_context_->file_context->section_map; |
|
751 dwarf2reader::SectionMap::const_iterator map_entry |
|
752 = section_map.find(".debug_line"); |
|
753 // Mac OS X puts DWARF data in sections whose names begin with "__" |
|
754 // instead of ".". |
|
755 if (map_entry == section_map.end()) |
|
756 map_entry = section_map.find("__debug_line"); |
|
757 if (map_entry == section_map.end()) { |
|
758 cu_context_->reporter->MissingSection(".debug_line"); |
|
759 return; |
|
760 } |
|
761 const char *section_start = map_entry->second.first; |
|
762 uint64 section_length = map_entry->second.second; |
|
763 if (offset >= section_length) { |
|
764 cu_context_->reporter->BadLineInfoOffset(offset); |
|
765 return; |
|
766 } |
|
767 line_reader_->ReadProgram(section_start + offset, section_length - offset, |
|
768 cu_context_->file_context->module, &lines_); |
|
769 } |
|
770 |
|
771 namespace { |
|
772 // Return true if ADDRESS falls within the range of ITEM. |
|
773 template <class T> |
|
774 inline bool within(const T &item, Module::Address address) { |
|
775 // Because Module::Address is unsigned, and unsigned arithmetic |
|
776 // wraps around, this will be false if ADDRESS falls before the |
|
777 // start of ITEM, or if it falls after ITEM's end. |
|
778 return address - item.address < item.size; |
|
779 } |
|
780 } |
|
781 |
|
782 void DwarfCUToModule::AssignLinesToFunctions() { |
|
783 vector<Module::Function *> *functions = &cu_context_->functions; |
|
784 WarningReporter *reporter = cu_context_->reporter; |
|
785 |
|
786 // This would be simpler if we assumed that source line entries |
|
787 // don't cross function boundaries. However, there's no real reason |
|
788 // to assume that (say) a series of function definitions on the same |
|
789 // line wouldn't get coalesced into one line number entry. The |
|
790 // DWARF spec certainly makes no such promises. |
|
791 // |
|
792 // So treat the functions and lines as peers, and take the trouble |
|
793 // to compute their ranges' intersections precisely. In any case, |
|
794 // the hair here is a constant factor for performance; the |
|
795 // complexity from here on out is linear. |
|
796 |
|
797 // Put both our functions and lines in order by address. |
|
798 std::sort(functions->begin(), functions->end(), |
|
799 Module::Function::CompareByAddress); |
|
800 std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress); |
|
801 |
|
802 // The last line that we used any piece of. We use this only for |
|
803 // generating warnings. |
|
804 const Module::Line *last_line_used = NULL; |
|
805 |
|
806 // The last function and line we warned about --- so we can avoid |
|
807 // doing so more than once. |
|
808 const Module::Function *last_function_cited = NULL; |
|
809 const Module::Line *last_line_cited = NULL; |
|
810 |
|
811 // Make a single pass through both vectors from lower to higher |
|
812 // addresses, populating each Function's lines vector with lines |
|
813 // from our lines_ vector that fall within the function's address |
|
814 // range. |
|
815 vector<Module::Function *>::iterator func_it = functions->begin(); |
|
816 vector<Module::Line>::const_iterator line_it = lines_.begin(); |
|
817 |
|
818 Module::Address current; |
|
819 |
|
820 // Pointers to the referents of func_it and line_it, or NULL if the |
|
821 // iterator is at the end of the sequence. |
|
822 Module::Function *func; |
|
823 const Module::Line *line; |
|
824 |
|
825 // Start current at the beginning of the first line or function, |
|
826 // whichever is earlier. |
|
827 if (func_it != functions->end() && line_it != lines_.end()) { |
|
828 func = *func_it; |
|
829 line = &*line_it; |
|
830 current = std::min(func->address, line->address); |
|
831 } else if (line_it != lines_.end()) { |
|
832 func = NULL; |
|
833 line = &*line_it; |
|
834 current = line->address; |
|
835 } else if (func_it != functions->end()) { |
|
836 func = *func_it; |
|
837 line = NULL; |
|
838 current = (*func_it)->address; |
|
839 } else { |
|
840 return; |
|
841 } |
|
842 |
|
843 while (func || line) { |
|
844 // This loop has two invariants that hold at the top. |
|
845 // |
|
846 // First, at least one of the iterators is not at the end of its |
|
847 // sequence, and those that are not refer to the earliest |
|
848 // function or line that contains or starts after CURRENT. |
|
849 // |
|
850 // Note that every byte is in one of four states: it is covered |
|
851 // or not covered by a function, and, independently, it is |
|
852 // covered or not covered by a line. |
|
853 // |
|
854 // The second invariant is that CURRENT refers to a byte whose |
|
855 // state is different from its predecessor, or it refers to the |
|
856 // first byte in the address space. In other words, CURRENT is |
|
857 // always the address of a transition. |
|
858 // |
|
859 // Note that, although each iteration advances CURRENT from one |
|
860 // transition address to the next in each iteration, it might |
|
861 // not advance the iterators. Suppose we have a function that |
|
862 // starts with a line, has a gap, and then a second line, and |
|
863 // suppose that we enter an iteration with CURRENT at the end of |
|
864 // the first line. The next transition address is the start of |
|
865 // the second line, after the gap, so the iteration should |
|
866 // advance CURRENT to that point. At the head of that iteration, |
|
867 // the invariants require that the line iterator be pointing at |
|
868 // the second line. But this is also true at the head of the |
|
869 // next. And clearly, the iteration must not change the function |
|
870 // iterator. So neither iterator moves. |
|
871 |
|
872 // Assert the first invariant (see above). |
|
873 assert(!func || current < func->address || within(*func, current)); |
|
874 assert(!line || current < line->address || within(*line, current)); |
|
875 |
|
876 // The next transition after CURRENT. |
|
877 Module::Address next_transition; |
|
878 |
|
879 // Figure out which state we're in, add lines or warn, and compute |
|
880 // the next transition address. |
|
881 if (func && current >= func->address) { |
|
882 if (line && current >= line->address) { |
|
883 // Covered by both a line and a function. |
|
884 Module::Address func_left = func->size - (current - func->address); |
|
885 Module::Address line_left = line->size - (current - line->address); |
|
886 // This may overflow, but things work out. |
|
887 next_transition = current + std::min(func_left, line_left); |
|
888 Module::Line l = *line; |
|
889 l.address = current; |
|
890 l.size = next_transition - current; |
|
891 func->lines.push_back(l); |
|
892 last_line_used = line; |
|
893 } else { |
|
894 // Covered by a function, but no line. |
|
895 if (func != last_function_cited) { |
|
896 reporter->UncoveredFunction(*func); |
|
897 last_function_cited = func; |
|
898 } |
|
899 if (line && within(*func, line->address)) |
|
900 next_transition = line->address; |
|
901 else |
|
902 // If this overflows, we'll catch it below. |
|
903 next_transition = func->address + func->size; |
|
904 } |
|
905 } else { |
|
906 if (line && current >= line->address) { |
|
907 // Covered by a line, but no function. |
|
908 // |
|
909 // If GCC emits padding after one function to align the start |
|
910 // of the next, then it will attribute the padding |
|
911 // instructions to the last source line of function (to reduce |
|
912 // the size of the line number info), but omit it from the |
|
913 // DW_AT_{low,high}_pc range given in .debug_info (since it |
|
914 // costs nothing to be precise there). If we did use at least |
|
915 // some of the line we're about to skip, and it ends at the |
|
916 // start of the next function, then assume this is what |
|
917 // happened, and don't warn. |
|
918 if (line != last_line_cited |
|
919 && !(func |
|
920 && line == last_line_used |
|
921 && func->address - line->address == line->size)) { |
|
922 reporter->UncoveredLine(*line); |
|
923 last_line_cited = line; |
|
924 } |
|
925 if (func && within(*line, func->address)) |
|
926 next_transition = func->address; |
|
927 else |
|
928 // If this overflows, we'll catch it below. |
|
929 next_transition = line->address + line->size; |
|
930 } else { |
|
931 // Covered by neither a function nor a line. By the invariant, |
|
932 // both func and line begin after CURRENT. The next transition |
|
933 // is the start of the next function or next line, whichever |
|
934 // is earliest. |
|
935 assert (func || line); |
|
936 if (func && line) |
|
937 next_transition = std::min(func->address, line->address); |
|
938 else if (func) |
|
939 next_transition = func->address; |
|
940 else |
|
941 next_transition = line->address; |
|
942 } |
|
943 } |
|
944 |
|
945 // If a function or line abuts the end of the address space, then |
|
946 // next_transition may end up being zero, in which case we've completed |
|
947 // our pass. Handle that here, instead of trying to deal with it in |
|
948 // each place we compute next_transition. |
|
949 if (!next_transition) |
|
950 break; |
|
951 |
|
952 // Advance iterators as needed. If lines overlap or functions overlap, |
|
953 // then we could go around more than once. We don't worry too much |
|
954 // about what result we produce in that case, just as long as we don't |
|
955 // hang or crash. |
|
956 while (func_it != functions->end() |
|
957 && next_transition >= (*func_it)->address |
|
958 && !within(**func_it, next_transition)) |
|
959 func_it++; |
|
960 func = (func_it != functions->end()) ? *func_it : NULL; |
|
961 while (line_it != lines_.end() |
|
962 && next_transition >= line_it->address |
|
963 && !within(*line_it, next_transition)) |
|
964 line_it++; |
|
965 line = (line_it != lines_.end()) ? &*line_it : NULL; |
|
966 |
|
967 // We must make progress. |
|
968 assert(next_transition > current); |
|
969 current = next_transition; |
|
970 } |
|
971 } |
|
972 |
|
973 void DwarfCUToModule::Finish() { |
|
974 // Assembly language files have no function data, and that gives us |
|
975 // no place to store our line numbers (even though the GNU toolchain |
|
976 // will happily produce source line info for assembly language |
|
977 // files). To avoid spurious warnings about lines we can't assign |
|
978 // to functions, skip CUs in languages that lack functions. |
|
979 if (!cu_context_->language->HasFunctions()) |
|
980 return; |
|
981 |
|
982 // Read source line info, if we have any. |
|
983 if (has_source_line_info_) |
|
984 ReadSourceLines(source_line_offset_); |
|
985 |
|
986 vector<Module::Function *> *functions = &cu_context_->functions; |
|
987 |
|
988 // Dole out lines to the appropriate functions. |
|
989 AssignLinesToFunctions(); |
|
990 |
|
991 // Add our functions, which now have source lines assigned to them, |
|
992 // to module_. |
|
993 cu_context_->file_context->module->AddFunctions(functions->begin(), |
|
994 functions->end()); |
|
995 |
|
996 // Ownership of the function objects has shifted from cu_context to |
|
997 // the Module. |
|
998 functions->clear(); |
|
999 } |
|
1000 |
|
1001 bool DwarfCUToModule::StartCompilationUnit(uint64 offset, |
|
1002 uint8 address_size, |
|
1003 uint8 offset_size, |
|
1004 uint64 cu_length, |
|
1005 uint8 dwarf_version) { |
|
1006 return dwarf_version >= 2; |
|
1007 } |
|
1008 |
|
1009 bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag) { |
|
1010 // We don't deal with partial compilation units (the only other tag |
|
1011 // likely to be used for root DIE). |
|
1012 return tag == dwarf2reader::DW_TAG_compile_unit; |
|
1013 } |
|
1014 |
|
1015 } // namespace google_breakpad |