toolkit/crashreporter/google-breakpad/src/common/dwarf_cu_to_module.cc

branch
TOR_BUG_3246
changeset 7
129ffea94266
equal deleted inserted replaced
-1:000000000000 0:dab46aedb8f4
1 // Copyright (c) 2010 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31
32 // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h.
33
34 // For <inttypes.h> PRI* macros, before anything else might #include it.
35 #ifndef __STDC_FORMAT_MACROS
36 #define __STDC_FORMAT_MACROS
37 #endif /* __STDC_FORMAT_MACROS */
38
39 #include "common/dwarf_cu_to_module.h"
40
41 #include <assert.h>
42 #if !defined(__ANDROID__)
43 #include <cxxabi.h>
44 #endif
45 #include <inttypes.h>
46
47 #include <algorithm>
48 #include <set>
49 #include <utility>
50 #include <iomanip>
51
52 #include "common/dwarf_line_to_module.h"
53 #include "common/logging.h"
54
55 namespace google_breakpad {
56
57 using std::map;
58 using std::pair;
59 using std::set;
60 using std::sort;
61 using std::vector;
62
63 // Data provided by a DWARF specification DIE.
64 //
65 // In DWARF, the DIE for a definition may contain a DW_AT_specification
66 // attribute giving the offset of the corresponding declaration DIE, and
67 // the definition DIE may omit information given in the declaration. For
68 // example, it's common for a function's address range to appear only in
69 // its definition DIE, but its name to appear only in its declaration
70 // DIE.
71 //
72 // The dumper needs to be able to follow DW_AT_specification links to
73 // bring all this information together in a FUNC record. Conveniently,
74 // DIEs that are the target of such links have a DW_AT_declaration flag
75 // set, so we can identify them when we first see them, and record their
76 // contents for later reference.
77 //
78 // A Specification holds information gathered from a declaration DIE that
79 // we may need if we find a DW_AT_specification link pointing to it.
80 struct DwarfCUToModule::Specification {
81 // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name.
82 string qualified_name;
83
84 // The name of the enclosing scope, or the empty string if there is none.
85 string enclosing_name;
86
87 // The name for the specification DIE itself, without any enclosing
88 // name components.
89 string unqualified_name;
90 };
91
92 // An abstract origin -- base definition of an inline function.
93 struct AbstractOrigin {
94 AbstractOrigin() : name() {}
95 AbstractOrigin(const string& name) : name(name) {}
96
97 string name;
98 };
99
100 typedef map<uint64, AbstractOrigin> AbstractOriginByOffset;
101
102 // Data global to the DWARF-bearing file that is private to the
103 // DWARF-to-Module process.
104 struct DwarfCUToModule::FilePrivate {
105 // A set of strings used in this CU. Before storing a string in one of
106 // our data structures, insert it into this set, and then use the string
107 // from the set.
108 //
109 // In some STL implementations, strings are reference-counted internally,
110 // meaning that simply using strings from this set, even if passed by
111 // value, assigned, or held directly in structures and containers
112 // (map<string, ...>, for example), causes those strings to share a
113 // single instance of each distinct piece of text. GNU's libstdc++ uses
114 // reference counts, and I believe MSVC did as well, at some point.
115 // However, C++ '11 implementations are moving away from reference
116 // counting.
117 //
118 // In other implementations, string assignments copy the string's text,
119 // so this set will actually hold yet another copy of the string (although
120 // everything will still work). To improve memory consumption portably,
121 // we will probably need to use pointers to strings held in this set.
122 set<string> common_strings;
123
124 // A map from offsets of DIEs within the .debug_info section to
125 // Specifications describing those DIEs. Specification references can
126 // cross compilation unit boundaries.
127 SpecificationByOffset specifications;
128
129 AbstractOriginByOffset origins;
130 };
131
132 DwarfCUToModule::FileContext::FileContext(const string &filename_arg,
133 Module *module_arg)
134 : filename(filename_arg), module(module_arg) {
135 file_private = new FilePrivate();
136 }
137
138 DwarfCUToModule::FileContext::~FileContext() {
139 delete file_private;
140 }
141
142 // Information global to the particular compilation unit we're
143 // parsing. This is for data shared across the CU's entire DIE tree,
144 // and parameters from the code invoking the CU parser.
145 struct DwarfCUToModule::CUContext {
146 CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg)
147 : file_context(file_context_arg),
148 reporter(reporter_arg),
149 language(Language::CPlusPlus) { }
150 ~CUContext() {
151 for (vector<Module::Function *>::iterator it = functions.begin();
152 it != functions.end(); it++)
153 delete *it;
154 };
155
156 // The DWARF-bearing file into which this CU was incorporated.
157 FileContext *file_context;
158
159 // For printing error messages.
160 WarningReporter *reporter;
161
162 // The source language of this compilation unit.
163 const Language *language;
164
165 // The functions defined in this compilation unit. We accumulate
166 // them here during parsing. Then, in DwarfCUToModule::Finish, we
167 // assign them lines and add them to file_context->module.
168 //
169 // Destroying this destroys all the functions this vector points to.
170 vector<Module::Function *> functions;
171 };
172
173 // Information about the context of a particular DIE. This is for
174 // information that changes as we descend the tree towards the leaves:
175 // the containing classes/namespaces, etc.
176 struct DwarfCUToModule::DIEContext {
177 // The fully-qualified name of the context. For example, for a
178 // tree like:
179 //
180 // DW_TAG_namespace Foo
181 // DW_TAG_class Bar
182 // DW_TAG_subprogram Baz
183 //
184 // in a C++ compilation unit, the DIEContext's name for the
185 // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's
186 // name for the DW_TAG_namespace DIE would be "".
187 string name;
188 };
189
190 // An abstract base class for all the dumper's DIE handlers.
191 class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler {
192 public:
193 // Create a handler for the DIE at OFFSET whose compilation unit is
194 // described by CU_CONTEXT, and whose immediate context is described
195 // by PARENT_CONTEXT.
196 GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context,
197 uint64 offset)
198 : cu_context_(cu_context),
199 parent_context_(parent_context),
200 offset_(offset),
201 declaration_(false),
202 specification_(NULL) { }
203
204 // Derived classes' ProcessAttributeUnsigned can defer to this to
205 // handle DW_AT_declaration, or simply not override it.
206 void ProcessAttributeUnsigned(enum DwarfAttribute attr,
207 enum DwarfForm form,
208 uint64 data);
209
210 // Derived classes' ProcessAttributeReference can defer to this to
211 // handle DW_AT_specification, or simply not override it.
212 void ProcessAttributeReference(enum DwarfAttribute attr,
213 enum DwarfForm form,
214 uint64 data);
215
216 // Derived classes' ProcessAttributeReference can defer to this to
217 // handle DW_AT_specification, or simply not override it.
218 void ProcessAttributeString(enum DwarfAttribute attr,
219 enum DwarfForm form,
220 const string &data);
221
222 protected:
223 // Compute and return the fully-qualified name of the DIE. If this
224 // DIE is a declaration DIE, to be cited by other DIEs'
225 // DW_AT_specification attributes, record its enclosing name and
226 // unqualified name in the specification table.
227 //
228 // Use this from EndAttributes member functions, not ProcessAttribute*
229 // functions; only the former can be sure that all the DIE's attributes
230 // have been seen.
231 string ComputeQualifiedName();
232
233 CUContext *cu_context_;
234 DIEContext *parent_context_;
235 uint64 offset_;
236
237 // Place the name in the global set of strings. Even though this looks
238 // like a copy, all the major std::string implementations use reference
239 // counting internally, so the effect is to have all the data structures
240 // share copies of strings whenever possible.
241 // FIXME: Should this return something like a string_ref to avoid the
242 // assumption about how strings are implemented?
243 string AddStringToPool(const string &str);
244
245 // If this DIE has a DW_AT_declaration attribute, this is its value.
246 // It is false on DIEs with no DW_AT_declaration attribute.
247 bool declaration_;
248
249 // If this DIE has a DW_AT_specification attribute, this is the
250 // Specification structure for the DIE the attribute refers to.
251 // Otherwise, this is NULL.
252 Specification *specification_;
253
254 // The value of the DW_AT_name attribute, or the empty string if the
255 // DIE has no such attribute.
256 string name_attribute_;
257
258 // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty
259 // string if the DIE has no such attribute or its content could not be
260 // demangled.
261 string demangled_name_;
262 };
263
264 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned(
265 enum DwarfAttribute attr,
266 enum DwarfForm form,
267 uint64 data) {
268 switch (attr) {
269 case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break;
270 default: break;
271 }
272 }
273
274 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference(
275 enum DwarfAttribute attr,
276 enum DwarfForm form,
277 uint64 data) {
278 switch (attr) {
279 case dwarf2reader::DW_AT_specification: {
280 // Find the Specification to which this attribute refers, and
281 // set specification_ appropriately. We could do more processing
282 // here, but it's better to leave the real work to our
283 // EndAttribute member function, at which point we know we have
284 // seen all the DIE's attributes.
285 FileContext *file_context = cu_context_->file_context;
286 SpecificationByOffset *specifications
287 = &file_context->file_private->specifications;
288 SpecificationByOffset::iterator spec = specifications->find(data);
289 if (spec != specifications->end()) {
290 specification_ = &spec->second;
291 } else {
292 // Technically, there's no reason a DW_AT_specification
293 // couldn't be a forward reference, but supporting that would
294 // be a lot of work (changing to a two-pass structure), and I
295 // don't think any producers we care about ever emit such
296 // things.
297 cu_context_->reporter->UnknownSpecification(offset_, data);
298 }
299 break;
300 }
301 default: break;
302 }
303 }
304
305 string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string &str) {
306 pair<set<string>::iterator, bool> result =
307 cu_context_->file_context->file_private->common_strings.insert(str);
308 return *result.first;
309 }
310
311 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
312 enum DwarfAttribute attr,
313 enum DwarfForm form,
314 const string &data) {
315 switch (attr) {
316 case dwarf2reader::DW_AT_name:
317 name_attribute_ = AddStringToPool(data);
318 break;
319 case dwarf2reader::DW_AT_MIPS_linkage_name: {
320 char* demangled = NULL;
321 #if !defined(__ANDROID__)
322 demangled = abi::__cxa_demangle(data.c_str(), NULL, NULL, NULL);
323 #endif
324 if (demangled) {
325 demangled_name_ = AddStringToPool(demangled);
326 free(reinterpret_cast<void*>(demangled));
327 }
328 break;
329 }
330 default: break;
331 }
332 }
333
334 string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
335 // Use the demangled name, if one is available. Demangled names are
336 // preferable to those inferred from the DWARF structure because they
337 // include argument types.
338 const string *qualified_name = NULL;
339 if (!demangled_name_.empty()) {
340 // Found it is this DIE.
341 qualified_name = &demangled_name_;
342 } else if (specification_ && !specification_->qualified_name.empty()) {
343 // Found it on the specification.
344 qualified_name = &specification_->qualified_name;
345 }
346
347 const string *unqualified_name;
348 const string *enclosing_name;
349 if (!qualified_name) {
350 // Find our unqualified name. If the DIE has its own DW_AT_name
351 // attribute, then use that; otherwise, check our specification.
352 if (name_attribute_.empty() && specification_)
353 unqualified_name = &specification_->unqualified_name;
354 else
355 unqualified_name = &name_attribute_;
356
357 // Find the name of our enclosing context. If we have a
358 // specification, it's the specification's enclosing context that
359 // counts; otherwise, use this DIE's context.
360 if (specification_)
361 enclosing_name = &specification_->enclosing_name;
362 else
363 enclosing_name = &parent_context_->name;
364 }
365
366 // If this DIE was marked as a declaration, record its names in the
367 // specification table.
368 if (declaration_) {
369 FileContext *file_context = cu_context_->file_context;
370 Specification spec;
371 if (qualified_name)
372 spec.qualified_name = *qualified_name;
373 else {
374 spec.enclosing_name = *enclosing_name;
375 spec.unqualified_name = *unqualified_name;
376 }
377 file_context->file_private->specifications[offset_] = spec;
378 }
379
380 if (qualified_name)
381 return *qualified_name;
382
383 // Combine the enclosing name and unqualified name to produce our
384 // own fully-qualified name.
385 return cu_context_->language->MakeQualifiedName(*enclosing_name,
386 *unqualified_name);
387 }
388
389 // A handler class for DW_TAG_subprogram DIEs.
390 class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
391 public:
392 FuncHandler(CUContext *cu_context, DIEContext *parent_context,
393 uint64 offset)
394 : GenericDIEHandler(cu_context, parent_context, offset),
395 low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr),
396 abstract_origin_(NULL), inline_(false) { }
397 void ProcessAttributeUnsigned(enum DwarfAttribute attr,
398 enum DwarfForm form,
399 uint64 data);
400 void ProcessAttributeSigned(enum DwarfAttribute attr,
401 enum DwarfForm form,
402 int64 data);
403 void ProcessAttributeReference(enum DwarfAttribute attr,
404 enum DwarfForm form,
405 uint64 data);
406
407 bool EndAttributes();
408 void Finish();
409
410 private:
411 // The fully-qualified name, as derived from name_attribute_,
412 // specification_, parent_context_. Computed in EndAttributes.
413 string name_;
414 uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc
415 DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
416 const AbstractOrigin* abstract_origin_;
417 bool inline_;
418 };
419
420 void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned(
421 enum DwarfAttribute attr,
422 enum DwarfForm form,
423 uint64 data) {
424 switch (attr) {
425 // If this attribute is present at all --- even if its value is
426 // DW_INL_not_inlined --- then GCC may cite it as someone else's
427 // DW_AT_abstract_origin attribute.
428 case dwarf2reader::DW_AT_inline: inline_ = true; break;
429
430 case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break;
431 case dwarf2reader::DW_AT_high_pc:
432 high_pc_form_ = form;
433 high_pc_ = data;
434 break;
435
436 default:
437 GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
438 break;
439 }
440 }
441
442 void DwarfCUToModule::FuncHandler::ProcessAttributeSigned(
443 enum DwarfAttribute attr,
444 enum DwarfForm form,
445 int64 data) {
446 switch (attr) {
447 // If this attribute is present at all --- even if its value is
448 // DW_INL_not_inlined --- then GCC may cite it as someone else's
449 // DW_AT_abstract_origin attribute.
450 case dwarf2reader::DW_AT_inline: inline_ = true; break;
451
452 default:
453 break;
454 }
455 }
456
457 void DwarfCUToModule::FuncHandler::ProcessAttributeReference(
458 enum DwarfAttribute attr,
459 enum DwarfForm form,
460 uint64 data) {
461 switch(attr) {
462 case dwarf2reader::DW_AT_abstract_origin: {
463 const AbstractOriginByOffset& origins =
464 cu_context_->file_context->file_private->origins;
465 AbstractOriginByOffset::const_iterator origin = origins.find(data);
466 if (origin != origins.end()) {
467 abstract_origin_ = &(origin->second);
468 } else {
469 cu_context_->reporter->UnknownAbstractOrigin(offset_, data);
470 }
471 break;
472 }
473 default:
474 GenericDIEHandler::ProcessAttributeReference(attr, form, data);
475 break;
476 }
477 }
478
479 bool DwarfCUToModule::FuncHandler::EndAttributes() {
480 // Compute our name, and record a specification, if appropriate.
481 name_ = ComputeQualifiedName();
482 if (name_.empty() && abstract_origin_) {
483 name_ = abstract_origin_->name;
484 }
485 return true;
486 }
487
488 void DwarfCUToModule::FuncHandler::Finish() {
489 // Make high_pc_ an address, if it isn't already.
490 if (high_pc_form_ != dwarf2reader::DW_FORM_addr) {
491 high_pc_ += low_pc_;
492 }
493
494 // Did we collect the information we need? Not all DWARF function
495 // entries have low and high addresses (for example, inlined
496 // functions that were never used), but all the ones we're
497 // interested in cover a non-empty range of bytes.
498 if (low_pc_ < high_pc_) {
499 // Create a Module::Function based on the data we've gathered, and
500 // add it to the functions_ list.
501 Module::Function *func = new Module::Function;
502 // Malformed DWARF may omit the name, but all Module::Functions must
503 // have names.
504 if (!name_.empty()) {
505 func->name = name_;
506 } else {
507 cu_context_->reporter->UnnamedFunction(offset_);
508 func->name = "<name omitted>";
509 }
510 func->address = low_pc_;
511 func->size = high_pc_ - low_pc_;
512 func->parameter_size = 0;
513 if (func->address) {
514 // If the function address is zero this is a sign that this function
515 // description is just empty debug data and should just be discarded.
516 cu_context_->functions.push_back(func);
517 }
518 } else if (inline_) {
519 AbstractOrigin origin(name_);
520 cu_context_->file_context->file_private->origins[offset_] = origin;
521 }
522 }
523
524 // A handler for DIEs that contain functions and contribute a
525 // component to their names: namespaces, classes, etc.
526 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler {
527 public:
528 NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context,
529 uint64 offset)
530 : GenericDIEHandler(cu_context, parent_context, offset) { }
531 bool EndAttributes();
532 DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag);
533
534 private:
535 DIEContext child_context_; // A context for our children.
536 };
537
538 bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
539 child_context_.name = ComputeQualifiedName();
540 return true;
541 }
542
543 dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler(
544 uint64 offset,
545 enum DwarfTag tag) {
546 switch (tag) {
547 case dwarf2reader::DW_TAG_subprogram:
548 return new FuncHandler(cu_context_, &child_context_, offset);
549 case dwarf2reader::DW_TAG_namespace:
550 case dwarf2reader::DW_TAG_class_type:
551 case dwarf2reader::DW_TAG_structure_type:
552 case dwarf2reader::DW_TAG_union_type:
553 return new NamedScopeHandler(cu_context_, &child_context_, offset);
554 default:
555 return NULL;
556 }
557 }
558
559 void DwarfCUToModule::WarningReporter::CUHeading() {
560 if (printed_cu_header_)
561 return;
562 BPLOG(INFO)
563 << filename_ << ": in compilation unit '" << cu_name_
564 << "' (offset 0x" << std::setbase(16) << cu_offset_ << std::setbase(10)
565 << "):";
566 printed_cu_header_ = true;
567 }
568
569 void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset,
570 uint64 target) {
571 CUHeading();
572 BPLOG(INFO)
573 << filename_ << ": the DIE at offset 0x"
574 << std::setbase(16) << offset << std::setbase(10)
575 << " has a DW_AT_specification attribute referring to the die at offset 0x"
576 << std::setbase(16) << target << std::setbase(10)
577 << ", which either was not marked as a declaration, or comes "
578 << "later in the file";
579 }
580
581 void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset,
582 uint64 target) {
583 CUHeading();
584 BPLOG(INFO)
585 << filename_ << ": the DIE at offset 0x"
586 << std::setbase(16) << offset << std::setbase(10)
587 << " has a DW_AT_abstract_origin attribute referring to the die at"
588 << " offset 0x" << std::setbase(16) << target << std::setbase(10)
589 << ", which either was not marked as an inline, or comes "
590 << "later in the file";
591 }
592
593 void DwarfCUToModule::WarningReporter::MissingSection(const string &name) {
594 CUHeading();
595 BPLOG(INFO) << filename_ << ": warning: couldn't find DWARF '"
596 << name << "' section";
597 }
598
599 void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) {
600 CUHeading();
601 BPLOG(INFO) << filename_ << ": warning: line number data offset beyond "
602 << "end of '.debug_line' section";
603 }
604
605 void DwarfCUToModule::WarningReporter::UncoveredHeading() {
606 if (printed_unpaired_header_)
607 return;
608 CUHeading();
609 BPLOG(INFO) << filename_ << ": warning: skipping unpaired lines/functions:";
610 printed_unpaired_header_ = true;
611 }
612
613 void DwarfCUToModule::WarningReporter::UncoveredFunction(
614 const Module::Function &function) {
615 if (!uncovered_warnings_enabled_)
616 return;
617 UncoveredHeading();
618 BPLOG(INFO) << " function" << (function.size == 0 ? " (zero-length)" : "")
619 << ": " << function.name;
620 }
621
622 void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) {
623 if (!uncovered_warnings_enabled_)
624 return;
625 UncoveredHeading();
626 BPLOG(INFO) << " line" << (line.size == 0 ? " (zero-length)" : "")
627 << ": " << line.file->name << ":" << line.number
628 << " at 0x" << std::setbase(16) << line.address << std::setbase(10);
629 }
630
631 void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) {
632 CUHeading();
633 BPLOG(INFO) << filename_ << ": warning: function at offset 0x"
634 << std::setbase(16) << offset << std::setbase(10) << " has no name";
635 }
636
637 DwarfCUToModule::DwarfCUToModule(FileContext *file_context,
638 LineToModuleHandler *line_reader,
639 WarningReporter *reporter)
640 : line_reader_(line_reader), has_source_line_info_(false) {
641 cu_context_ = new CUContext(file_context, reporter);
642 child_context_ = new DIEContext();
643 }
644
645 DwarfCUToModule::~DwarfCUToModule() {
646 delete cu_context_;
647 delete child_context_;
648 }
649
650 void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr,
651 enum DwarfForm form,
652 int64 data) {
653 switch (attr) {
654 case dwarf2reader::DW_AT_language: // source language of this CU
655 SetLanguage(static_cast<DwarfLanguage>(data));
656 break;
657 default:
658 break;
659 }
660 }
661
662 void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
663 enum DwarfForm form,
664 uint64 data) {
665 switch (attr) {
666 case dwarf2reader::DW_AT_stmt_list: // Line number information.
667 has_source_line_info_ = true;
668 source_line_offset_ = data;
669 break;
670 case dwarf2reader::DW_AT_language: // source language of this CU
671 SetLanguage(static_cast<DwarfLanguage>(data));
672 break;
673 default:
674 break;
675 }
676 }
677
678 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr,
679 enum DwarfForm form,
680 const string &data) {
681 switch (attr) {
682 case dwarf2reader::DW_AT_name:
683 cu_context_->reporter->SetCUName(data);
684 break;
685 case dwarf2reader::DW_AT_comp_dir:
686 line_reader_->StartCompilationUnit(data);
687 break;
688 default:
689 break;
690 }
691 }
692
693 bool DwarfCUToModule::EndAttributes() {
694 return true;
695 }
696
697 dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler(
698 uint64 offset,
699 enum DwarfTag tag) {
700 switch (tag) {
701 case dwarf2reader::DW_TAG_subprogram:
702 return new FuncHandler(cu_context_, child_context_, offset);
703 case dwarf2reader::DW_TAG_namespace:
704 case dwarf2reader::DW_TAG_class_type:
705 case dwarf2reader::DW_TAG_structure_type:
706 case dwarf2reader::DW_TAG_union_type:
707 return new NamedScopeHandler(cu_context_, child_context_, offset);
708 default:
709 return NULL;
710 }
711 }
712
713 void DwarfCUToModule::SetLanguage(DwarfLanguage language) {
714 switch (language) {
715 case dwarf2reader::DW_LANG_Java:
716 cu_context_->language = Language::Java;
717 break;
718
719 // DWARF has no generic language code for assembly language; this is
720 // what the GNU toolchain uses.
721 case dwarf2reader::DW_LANG_Mips_Assembler:
722 cu_context_->language = Language::Assembler;
723 break;
724
725 // C++ covers so many cases that it probably has some way to cope
726 // with whatever the other languages throw at us. So make it the
727 // default.
728 //
729 // Objective C and Objective C++ seem to create entries for
730 // methods whose DW_AT_name values are already fully-qualified:
731 // "-[Classname method:]". These appear at the top level.
732 //
733 // DWARF data for C should never include namespaces or functions
734 // nested in struct types, but if it ever does, then C++'s
735 // notation is probably not a bad choice for that.
736 default:
737 case dwarf2reader::DW_LANG_ObjC:
738 case dwarf2reader::DW_LANG_ObjC_plus_plus:
739 case dwarf2reader::DW_LANG_C:
740 case dwarf2reader::DW_LANG_C89:
741 case dwarf2reader::DW_LANG_C99:
742 case dwarf2reader::DW_LANG_C_plus_plus:
743 cu_context_->language = Language::CPlusPlus;
744 break;
745 }
746 }
747
748 void DwarfCUToModule::ReadSourceLines(uint64 offset) {
749 const dwarf2reader::SectionMap &section_map
750 = cu_context_->file_context->section_map;
751 dwarf2reader::SectionMap::const_iterator map_entry
752 = section_map.find(".debug_line");
753 // Mac OS X puts DWARF data in sections whose names begin with "__"
754 // instead of ".".
755 if (map_entry == section_map.end())
756 map_entry = section_map.find("__debug_line");
757 if (map_entry == section_map.end()) {
758 cu_context_->reporter->MissingSection(".debug_line");
759 return;
760 }
761 const char *section_start = map_entry->second.first;
762 uint64 section_length = map_entry->second.second;
763 if (offset >= section_length) {
764 cu_context_->reporter->BadLineInfoOffset(offset);
765 return;
766 }
767 line_reader_->ReadProgram(section_start + offset, section_length - offset,
768 cu_context_->file_context->module, &lines_);
769 }
770
771 namespace {
772 // Return true if ADDRESS falls within the range of ITEM.
773 template <class T>
774 inline bool within(const T &item, Module::Address address) {
775 // Because Module::Address is unsigned, and unsigned arithmetic
776 // wraps around, this will be false if ADDRESS falls before the
777 // start of ITEM, or if it falls after ITEM's end.
778 return address - item.address < item.size;
779 }
780 }
781
782 void DwarfCUToModule::AssignLinesToFunctions() {
783 vector<Module::Function *> *functions = &cu_context_->functions;
784 WarningReporter *reporter = cu_context_->reporter;
785
786 // This would be simpler if we assumed that source line entries
787 // don't cross function boundaries. However, there's no real reason
788 // to assume that (say) a series of function definitions on the same
789 // line wouldn't get coalesced into one line number entry. The
790 // DWARF spec certainly makes no such promises.
791 //
792 // So treat the functions and lines as peers, and take the trouble
793 // to compute their ranges' intersections precisely. In any case,
794 // the hair here is a constant factor for performance; the
795 // complexity from here on out is linear.
796
797 // Put both our functions and lines in order by address.
798 std::sort(functions->begin(), functions->end(),
799 Module::Function::CompareByAddress);
800 std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress);
801
802 // The last line that we used any piece of. We use this only for
803 // generating warnings.
804 const Module::Line *last_line_used = NULL;
805
806 // The last function and line we warned about --- so we can avoid
807 // doing so more than once.
808 const Module::Function *last_function_cited = NULL;
809 const Module::Line *last_line_cited = NULL;
810
811 // Make a single pass through both vectors from lower to higher
812 // addresses, populating each Function's lines vector with lines
813 // from our lines_ vector that fall within the function's address
814 // range.
815 vector<Module::Function *>::iterator func_it = functions->begin();
816 vector<Module::Line>::const_iterator line_it = lines_.begin();
817
818 Module::Address current;
819
820 // Pointers to the referents of func_it and line_it, or NULL if the
821 // iterator is at the end of the sequence.
822 Module::Function *func;
823 const Module::Line *line;
824
825 // Start current at the beginning of the first line or function,
826 // whichever is earlier.
827 if (func_it != functions->end() && line_it != lines_.end()) {
828 func = *func_it;
829 line = &*line_it;
830 current = std::min(func->address, line->address);
831 } else if (line_it != lines_.end()) {
832 func = NULL;
833 line = &*line_it;
834 current = line->address;
835 } else if (func_it != functions->end()) {
836 func = *func_it;
837 line = NULL;
838 current = (*func_it)->address;
839 } else {
840 return;
841 }
842
843 while (func || line) {
844 // This loop has two invariants that hold at the top.
845 //
846 // First, at least one of the iterators is not at the end of its
847 // sequence, and those that are not refer to the earliest
848 // function or line that contains or starts after CURRENT.
849 //
850 // Note that every byte is in one of four states: it is covered
851 // or not covered by a function, and, independently, it is
852 // covered or not covered by a line.
853 //
854 // The second invariant is that CURRENT refers to a byte whose
855 // state is different from its predecessor, or it refers to the
856 // first byte in the address space. In other words, CURRENT is
857 // always the address of a transition.
858 //
859 // Note that, although each iteration advances CURRENT from one
860 // transition address to the next in each iteration, it might
861 // not advance the iterators. Suppose we have a function that
862 // starts with a line, has a gap, and then a second line, and
863 // suppose that we enter an iteration with CURRENT at the end of
864 // the first line. The next transition address is the start of
865 // the second line, after the gap, so the iteration should
866 // advance CURRENT to that point. At the head of that iteration,
867 // the invariants require that the line iterator be pointing at
868 // the second line. But this is also true at the head of the
869 // next. And clearly, the iteration must not change the function
870 // iterator. So neither iterator moves.
871
872 // Assert the first invariant (see above).
873 assert(!func || current < func->address || within(*func, current));
874 assert(!line || current < line->address || within(*line, current));
875
876 // The next transition after CURRENT.
877 Module::Address next_transition;
878
879 // Figure out which state we're in, add lines or warn, and compute
880 // the next transition address.
881 if (func && current >= func->address) {
882 if (line && current >= line->address) {
883 // Covered by both a line and a function.
884 Module::Address func_left = func->size - (current - func->address);
885 Module::Address line_left = line->size - (current - line->address);
886 // This may overflow, but things work out.
887 next_transition = current + std::min(func_left, line_left);
888 Module::Line l = *line;
889 l.address = current;
890 l.size = next_transition - current;
891 func->lines.push_back(l);
892 last_line_used = line;
893 } else {
894 // Covered by a function, but no line.
895 if (func != last_function_cited) {
896 reporter->UncoveredFunction(*func);
897 last_function_cited = func;
898 }
899 if (line && within(*func, line->address))
900 next_transition = line->address;
901 else
902 // If this overflows, we'll catch it below.
903 next_transition = func->address + func->size;
904 }
905 } else {
906 if (line && current >= line->address) {
907 // Covered by a line, but no function.
908 //
909 // If GCC emits padding after one function to align the start
910 // of the next, then it will attribute the padding
911 // instructions to the last source line of function (to reduce
912 // the size of the line number info), but omit it from the
913 // DW_AT_{low,high}_pc range given in .debug_info (since it
914 // costs nothing to be precise there). If we did use at least
915 // some of the line we're about to skip, and it ends at the
916 // start of the next function, then assume this is what
917 // happened, and don't warn.
918 if (line != last_line_cited
919 && !(func
920 && line == last_line_used
921 && func->address - line->address == line->size)) {
922 reporter->UncoveredLine(*line);
923 last_line_cited = line;
924 }
925 if (func && within(*line, func->address))
926 next_transition = func->address;
927 else
928 // If this overflows, we'll catch it below.
929 next_transition = line->address + line->size;
930 } else {
931 // Covered by neither a function nor a line. By the invariant,
932 // both func and line begin after CURRENT. The next transition
933 // is the start of the next function or next line, whichever
934 // is earliest.
935 assert (func || line);
936 if (func && line)
937 next_transition = std::min(func->address, line->address);
938 else if (func)
939 next_transition = func->address;
940 else
941 next_transition = line->address;
942 }
943 }
944
945 // If a function or line abuts the end of the address space, then
946 // next_transition may end up being zero, in which case we've completed
947 // our pass. Handle that here, instead of trying to deal with it in
948 // each place we compute next_transition.
949 if (!next_transition)
950 break;
951
952 // Advance iterators as needed. If lines overlap or functions overlap,
953 // then we could go around more than once. We don't worry too much
954 // about what result we produce in that case, just as long as we don't
955 // hang or crash.
956 while (func_it != functions->end()
957 && next_transition >= (*func_it)->address
958 && !within(**func_it, next_transition))
959 func_it++;
960 func = (func_it != functions->end()) ? *func_it : NULL;
961 while (line_it != lines_.end()
962 && next_transition >= line_it->address
963 && !within(*line_it, next_transition))
964 line_it++;
965 line = (line_it != lines_.end()) ? &*line_it : NULL;
966
967 // We must make progress.
968 assert(next_transition > current);
969 current = next_transition;
970 }
971 }
972
973 void DwarfCUToModule::Finish() {
974 // Assembly language files have no function data, and that gives us
975 // no place to store our line numbers (even though the GNU toolchain
976 // will happily produce source line info for assembly language
977 // files). To avoid spurious warnings about lines we can't assign
978 // to functions, skip CUs in languages that lack functions.
979 if (!cu_context_->language->HasFunctions())
980 return;
981
982 // Read source line info, if we have any.
983 if (has_source_line_info_)
984 ReadSourceLines(source_line_offset_);
985
986 vector<Module::Function *> *functions = &cu_context_->functions;
987
988 // Dole out lines to the appropriate functions.
989 AssignLinesToFunctions();
990
991 // Add our functions, which now have source lines assigned to them,
992 // to module_.
993 cu_context_->file_context->module->AddFunctions(functions->begin(),
994 functions->end());
995
996 // Ownership of the function objects has shifted from cu_context to
997 // the Module.
998 functions->clear();
999 }
1000
1001 bool DwarfCUToModule::StartCompilationUnit(uint64 offset,
1002 uint8 address_size,
1003 uint8 offset_size,
1004 uint64 cu_length,
1005 uint8 dwarf_version) {
1006 return dwarf_version >= 2;
1007 }
1008
1009 bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag) {
1010 // We don't deal with partial compilation units (the only other tag
1011 // likely to be used for root DIE).
1012 return tag == dwarf2reader::DW_TAG_compile_unit;
1013 }
1014
1015 } // namespace google_breakpad

mercurial