1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/dwarf_cu_to_module.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1015 @@ 1.4 +// Copyright (c) 2010 Google Inc. 1.5 +// All rights reserved. 1.6 +// 1.7 +// Redistribution and use in source and binary forms, with or without 1.8 +// modification, are permitted provided that the following conditions are 1.9 +// met: 1.10 +// 1.11 +// * Redistributions of source code must retain the above copyright 1.12 +// notice, this list of conditions and the following disclaimer. 1.13 +// * Redistributions in binary form must reproduce the above 1.14 +// copyright notice, this list of conditions and the following disclaimer 1.15 +// in the documentation and/or other materials provided with the 1.16 +// distribution. 1.17 +// * Neither the name of Google Inc. nor the names of its 1.18 +// contributors may be used to endorse or promote products derived from 1.19 +// this software without specific prior written permission. 1.20 +// 1.21 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.22 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.23 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.24 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.25 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.26 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.27 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.28 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.29 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.30 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.31 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.32 + 1.33 +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 1.34 + 1.35 +// Implement the DwarfCUToModule class; see dwarf_cu_to_module.h. 1.36 + 1.37 +// For <inttypes.h> PRI* macros, before anything else might #include it. 1.38 +#ifndef __STDC_FORMAT_MACROS 1.39 +#define __STDC_FORMAT_MACROS 1.40 +#endif /* __STDC_FORMAT_MACROS */ 1.41 + 1.42 +#include "common/dwarf_cu_to_module.h" 1.43 + 1.44 +#include <assert.h> 1.45 +#if !defined(__ANDROID__) 1.46 +#include <cxxabi.h> 1.47 +#endif 1.48 +#include <inttypes.h> 1.49 + 1.50 +#include <algorithm> 1.51 +#include <set> 1.52 +#include <utility> 1.53 +#include <iomanip> 1.54 + 1.55 +#include "common/dwarf_line_to_module.h" 1.56 +#include "common/logging.h" 1.57 + 1.58 +namespace google_breakpad { 1.59 + 1.60 +using std::map; 1.61 +using std::pair; 1.62 +using std::set; 1.63 +using std::sort; 1.64 +using std::vector; 1.65 + 1.66 +// Data provided by a DWARF specification DIE. 1.67 +// 1.68 +// In DWARF, the DIE for a definition may contain a DW_AT_specification 1.69 +// attribute giving the offset of the corresponding declaration DIE, and 1.70 +// the definition DIE may omit information given in the declaration. For 1.71 +// example, it's common for a function's address range to appear only in 1.72 +// its definition DIE, but its name to appear only in its declaration 1.73 +// DIE. 1.74 +// 1.75 +// The dumper needs to be able to follow DW_AT_specification links to 1.76 +// bring all this information together in a FUNC record. Conveniently, 1.77 +// DIEs that are the target of such links have a DW_AT_declaration flag 1.78 +// set, so we can identify them when we first see them, and record their 1.79 +// contents for later reference. 1.80 +// 1.81 +// A Specification holds information gathered from a declaration DIE that 1.82 +// we may need if we find a DW_AT_specification link pointing to it. 1.83 +struct DwarfCUToModule::Specification { 1.84 + // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name. 1.85 + string qualified_name; 1.86 + 1.87 + // The name of the enclosing scope, or the empty string if there is none. 1.88 + string enclosing_name; 1.89 + 1.90 + // The name for the specification DIE itself, without any enclosing 1.91 + // name components. 1.92 + string unqualified_name; 1.93 +}; 1.94 + 1.95 +// An abstract origin -- base definition of an inline function. 1.96 +struct AbstractOrigin { 1.97 + AbstractOrigin() : name() {} 1.98 + AbstractOrigin(const string& name) : name(name) {} 1.99 + 1.100 + string name; 1.101 +}; 1.102 + 1.103 +typedef map<uint64, AbstractOrigin> AbstractOriginByOffset; 1.104 + 1.105 +// Data global to the DWARF-bearing file that is private to the 1.106 +// DWARF-to-Module process. 1.107 +struct DwarfCUToModule::FilePrivate { 1.108 + // A set of strings used in this CU. Before storing a string in one of 1.109 + // our data structures, insert it into this set, and then use the string 1.110 + // from the set. 1.111 + // 1.112 + // In some STL implementations, strings are reference-counted internally, 1.113 + // meaning that simply using strings from this set, even if passed by 1.114 + // value, assigned, or held directly in structures and containers 1.115 + // (map<string, ...>, for example), causes those strings to share a 1.116 + // single instance of each distinct piece of text. GNU's libstdc++ uses 1.117 + // reference counts, and I believe MSVC did as well, at some point. 1.118 + // However, C++ '11 implementations are moving away from reference 1.119 + // counting. 1.120 + // 1.121 + // In other implementations, string assignments copy the string's text, 1.122 + // so this set will actually hold yet another copy of the string (although 1.123 + // everything will still work). To improve memory consumption portably, 1.124 + // we will probably need to use pointers to strings held in this set. 1.125 + set<string> common_strings; 1.126 + 1.127 + // A map from offsets of DIEs within the .debug_info section to 1.128 + // Specifications describing those DIEs. Specification references can 1.129 + // cross compilation unit boundaries. 1.130 + SpecificationByOffset specifications; 1.131 + 1.132 + AbstractOriginByOffset origins; 1.133 +}; 1.134 + 1.135 +DwarfCUToModule::FileContext::FileContext(const string &filename_arg, 1.136 + Module *module_arg) 1.137 + : filename(filename_arg), module(module_arg) { 1.138 + file_private = new FilePrivate(); 1.139 +} 1.140 + 1.141 +DwarfCUToModule::FileContext::~FileContext() { 1.142 + delete file_private; 1.143 +} 1.144 + 1.145 +// Information global to the particular compilation unit we're 1.146 +// parsing. This is for data shared across the CU's entire DIE tree, 1.147 +// and parameters from the code invoking the CU parser. 1.148 +struct DwarfCUToModule::CUContext { 1.149 + CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg) 1.150 + : file_context(file_context_arg), 1.151 + reporter(reporter_arg), 1.152 + language(Language::CPlusPlus) { } 1.153 + ~CUContext() { 1.154 + for (vector<Module::Function *>::iterator it = functions.begin(); 1.155 + it != functions.end(); it++) 1.156 + delete *it; 1.157 + }; 1.158 + 1.159 + // The DWARF-bearing file into which this CU was incorporated. 1.160 + FileContext *file_context; 1.161 + 1.162 + // For printing error messages. 1.163 + WarningReporter *reporter; 1.164 + 1.165 + // The source language of this compilation unit. 1.166 + const Language *language; 1.167 + 1.168 + // The functions defined in this compilation unit. We accumulate 1.169 + // them here during parsing. Then, in DwarfCUToModule::Finish, we 1.170 + // assign them lines and add them to file_context->module. 1.171 + // 1.172 + // Destroying this destroys all the functions this vector points to. 1.173 + vector<Module::Function *> functions; 1.174 +}; 1.175 + 1.176 +// Information about the context of a particular DIE. This is for 1.177 +// information that changes as we descend the tree towards the leaves: 1.178 +// the containing classes/namespaces, etc. 1.179 +struct DwarfCUToModule::DIEContext { 1.180 + // The fully-qualified name of the context. For example, for a 1.181 + // tree like: 1.182 + // 1.183 + // DW_TAG_namespace Foo 1.184 + // DW_TAG_class Bar 1.185 + // DW_TAG_subprogram Baz 1.186 + // 1.187 + // in a C++ compilation unit, the DIEContext's name for the 1.188 + // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's 1.189 + // name for the DW_TAG_namespace DIE would be "". 1.190 + string name; 1.191 +}; 1.192 + 1.193 +// An abstract base class for all the dumper's DIE handlers. 1.194 +class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler { 1.195 + public: 1.196 + // Create a handler for the DIE at OFFSET whose compilation unit is 1.197 + // described by CU_CONTEXT, and whose immediate context is described 1.198 + // by PARENT_CONTEXT. 1.199 + GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context, 1.200 + uint64 offset) 1.201 + : cu_context_(cu_context), 1.202 + parent_context_(parent_context), 1.203 + offset_(offset), 1.204 + declaration_(false), 1.205 + specification_(NULL) { } 1.206 + 1.207 + // Derived classes' ProcessAttributeUnsigned can defer to this to 1.208 + // handle DW_AT_declaration, or simply not override it. 1.209 + void ProcessAttributeUnsigned(enum DwarfAttribute attr, 1.210 + enum DwarfForm form, 1.211 + uint64 data); 1.212 + 1.213 + // Derived classes' ProcessAttributeReference can defer to this to 1.214 + // handle DW_AT_specification, or simply not override it. 1.215 + void ProcessAttributeReference(enum DwarfAttribute attr, 1.216 + enum DwarfForm form, 1.217 + uint64 data); 1.218 + 1.219 + // Derived classes' ProcessAttributeReference can defer to this to 1.220 + // handle DW_AT_specification, or simply not override it. 1.221 + void ProcessAttributeString(enum DwarfAttribute attr, 1.222 + enum DwarfForm form, 1.223 + const string &data); 1.224 + 1.225 + protected: 1.226 + // Compute and return the fully-qualified name of the DIE. If this 1.227 + // DIE is a declaration DIE, to be cited by other DIEs' 1.228 + // DW_AT_specification attributes, record its enclosing name and 1.229 + // unqualified name in the specification table. 1.230 + // 1.231 + // Use this from EndAttributes member functions, not ProcessAttribute* 1.232 + // functions; only the former can be sure that all the DIE's attributes 1.233 + // have been seen. 1.234 + string ComputeQualifiedName(); 1.235 + 1.236 + CUContext *cu_context_; 1.237 + DIEContext *parent_context_; 1.238 + uint64 offset_; 1.239 + 1.240 + // Place the name in the global set of strings. Even though this looks 1.241 + // like a copy, all the major std::string implementations use reference 1.242 + // counting internally, so the effect is to have all the data structures 1.243 + // share copies of strings whenever possible. 1.244 + // FIXME: Should this return something like a string_ref to avoid the 1.245 + // assumption about how strings are implemented? 1.246 + string AddStringToPool(const string &str); 1.247 + 1.248 + // If this DIE has a DW_AT_declaration attribute, this is its value. 1.249 + // It is false on DIEs with no DW_AT_declaration attribute. 1.250 + bool declaration_; 1.251 + 1.252 + // If this DIE has a DW_AT_specification attribute, this is the 1.253 + // Specification structure for the DIE the attribute refers to. 1.254 + // Otherwise, this is NULL. 1.255 + Specification *specification_; 1.256 + 1.257 + // The value of the DW_AT_name attribute, or the empty string if the 1.258 + // DIE has no such attribute. 1.259 + string name_attribute_; 1.260 + 1.261 + // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty 1.262 + // string if the DIE has no such attribute or its content could not be 1.263 + // demangled. 1.264 + string demangled_name_; 1.265 +}; 1.266 + 1.267 +void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned( 1.268 + enum DwarfAttribute attr, 1.269 + enum DwarfForm form, 1.270 + uint64 data) { 1.271 + switch (attr) { 1.272 + case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break; 1.273 + default: break; 1.274 + } 1.275 +} 1.276 + 1.277 +void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference( 1.278 + enum DwarfAttribute attr, 1.279 + enum DwarfForm form, 1.280 + uint64 data) { 1.281 + switch (attr) { 1.282 + case dwarf2reader::DW_AT_specification: { 1.283 + // Find the Specification to which this attribute refers, and 1.284 + // set specification_ appropriately. We could do more processing 1.285 + // here, but it's better to leave the real work to our 1.286 + // EndAttribute member function, at which point we know we have 1.287 + // seen all the DIE's attributes. 1.288 + FileContext *file_context = cu_context_->file_context; 1.289 + SpecificationByOffset *specifications 1.290 + = &file_context->file_private->specifications; 1.291 + SpecificationByOffset::iterator spec = specifications->find(data); 1.292 + if (spec != specifications->end()) { 1.293 + specification_ = &spec->second; 1.294 + } else { 1.295 + // Technically, there's no reason a DW_AT_specification 1.296 + // couldn't be a forward reference, but supporting that would 1.297 + // be a lot of work (changing to a two-pass structure), and I 1.298 + // don't think any producers we care about ever emit such 1.299 + // things. 1.300 + cu_context_->reporter->UnknownSpecification(offset_, data); 1.301 + } 1.302 + break; 1.303 + } 1.304 + default: break; 1.305 + } 1.306 +} 1.307 + 1.308 +string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string &str) { 1.309 + pair<set<string>::iterator, bool> result = 1.310 + cu_context_->file_context->file_private->common_strings.insert(str); 1.311 + return *result.first; 1.312 +} 1.313 + 1.314 +void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( 1.315 + enum DwarfAttribute attr, 1.316 + enum DwarfForm form, 1.317 + const string &data) { 1.318 + switch (attr) { 1.319 + case dwarf2reader::DW_AT_name: 1.320 + name_attribute_ = AddStringToPool(data); 1.321 + break; 1.322 + case dwarf2reader::DW_AT_MIPS_linkage_name: { 1.323 + char* demangled = NULL; 1.324 +#if !defined(__ANDROID__) 1.325 + demangled = abi::__cxa_demangle(data.c_str(), NULL, NULL, NULL); 1.326 +#endif 1.327 + if (demangled) { 1.328 + demangled_name_ = AddStringToPool(demangled); 1.329 + free(reinterpret_cast<void*>(demangled)); 1.330 + } 1.331 + break; 1.332 + } 1.333 + default: break; 1.334 + } 1.335 +} 1.336 + 1.337 +string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { 1.338 + // Use the demangled name, if one is available. Demangled names are 1.339 + // preferable to those inferred from the DWARF structure because they 1.340 + // include argument types. 1.341 + const string *qualified_name = NULL; 1.342 + if (!demangled_name_.empty()) { 1.343 + // Found it is this DIE. 1.344 + qualified_name = &demangled_name_; 1.345 + } else if (specification_ && !specification_->qualified_name.empty()) { 1.346 + // Found it on the specification. 1.347 + qualified_name = &specification_->qualified_name; 1.348 + } 1.349 + 1.350 + const string *unqualified_name; 1.351 + const string *enclosing_name; 1.352 + if (!qualified_name) { 1.353 + // Find our unqualified name. If the DIE has its own DW_AT_name 1.354 + // attribute, then use that; otherwise, check our specification. 1.355 + if (name_attribute_.empty() && specification_) 1.356 + unqualified_name = &specification_->unqualified_name; 1.357 + else 1.358 + unqualified_name = &name_attribute_; 1.359 + 1.360 + // Find the name of our enclosing context. If we have a 1.361 + // specification, it's the specification's enclosing context that 1.362 + // counts; otherwise, use this DIE's context. 1.363 + if (specification_) 1.364 + enclosing_name = &specification_->enclosing_name; 1.365 + else 1.366 + enclosing_name = &parent_context_->name; 1.367 + } 1.368 + 1.369 + // If this DIE was marked as a declaration, record its names in the 1.370 + // specification table. 1.371 + if (declaration_) { 1.372 + FileContext *file_context = cu_context_->file_context; 1.373 + Specification spec; 1.374 + if (qualified_name) 1.375 + spec.qualified_name = *qualified_name; 1.376 + else { 1.377 + spec.enclosing_name = *enclosing_name; 1.378 + spec.unqualified_name = *unqualified_name; 1.379 + } 1.380 + file_context->file_private->specifications[offset_] = spec; 1.381 + } 1.382 + 1.383 + if (qualified_name) 1.384 + return *qualified_name; 1.385 + 1.386 + // Combine the enclosing name and unqualified name to produce our 1.387 + // own fully-qualified name. 1.388 + return cu_context_->language->MakeQualifiedName(*enclosing_name, 1.389 + *unqualified_name); 1.390 +} 1.391 + 1.392 +// A handler class for DW_TAG_subprogram DIEs. 1.393 +class DwarfCUToModule::FuncHandler: public GenericDIEHandler { 1.394 + public: 1.395 + FuncHandler(CUContext *cu_context, DIEContext *parent_context, 1.396 + uint64 offset) 1.397 + : GenericDIEHandler(cu_context, parent_context, offset), 1.398 + low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr), 1.399 + abstract_origin_(NULL), inline_(false) { } 1.400 + void ProcessAttributeUnsigned(enum DwarfAttribute attr, 1.401 + enum DwarfForm form, 1.402 + uint64 data); 1.403 + void ProcessAttributeSigned(enum DwarfAttribute attr, 1.404 + enum DwarfForm form, 1.405 + int64 data); 1.406 + void ProcessAttributeReference(enum DwarfAttribute attr, 1.407 + enum DwarfForm form, 1.408 + uint64 data); 1.409 + 1.410 + bool EndAttributes(); 1.411 + void Finish(); 1.412 + 1.413 + private: 1.414 + // The fully-qualified name, as derived from name_attribute_, 1.415 + // specification_, parent_context_. Computed in EndAttributes. 1.416 + string name_; 1.417 + uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc 1.418 + DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. 1.419 + const AbstractOrigin* abstract_origin_; 1.420 + bool inline_; 1.421 +}; 1.422 + 1.423 +void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned( 1.424 + enum DwarfAttribute attr, 1.425 + enum DwarfForm form, 1.426 + uint64 data) { 1.427 + switch (attr) { 1.428 + // If this attribute is present at all --- even if its value is 1.429 + // DW_INL_not_inlined --- then GCC may cite it as someone else's 1.430 + // DW_AT_abstract_origin attribute. 1.431 + case dwarf2reader::DW_AT_inline: inline_ = true; break; 1.432 + 1.433 + case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break; 1.434 + case dwarf2reader::DW_AT_high_pc: 1.435 + high_pc_form_ = form; 1.436 + high_pc_ = data; 1.437 + break; 1.438 + 1.439 + default: 1.440 + GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); 1.441 + break; 1.442 + } 1.443 +} 1.444 + 1.445 +void DwarfCUToModule::FuncHandler::ProcessAttributeSigned( 1.446 + enum DwarfAttribute attr, 1.447 + enum DwarfForm form, 1.448 + int64 data) { 1.449 + switch (attr) { 1.450 + // If this attribute is present at all --- even if its value is 1.451 + // DW_INL_not_inlined --- then GCC may cite it as someone else's 1.452 + // DW_AT_abstract_origin attribute. 1.453 + case dwarf2reader::DW_AT_inline: inline_ = true; break; 1.454 + 1.455 + default: 1.456 + break; 1.457 + } 1.458 +} 1.459 + 1.460 +void DwarfCUToModule::FuncHandler::ProcessAttributeReference( 1.461 + enum DwarfAttribute attr, 1.462 + enum DwarfForm form, 1.463 + uint64 data) { 1.464 + switch(attr) { 1.465 + case dwarf2reader::DW_AT_abstract_origin: { 1.466 + const AbstractOriginByOffset& origins = 1.467 + cu_context_->file_context->file_private->origins; 1.468 + AbstractOriginByOffset::const_iterator origin = origins.find(data); 1.469 + if (origin != origins.end()) { 1.470 + abstract_origin_ = &(origin->second); 1.471 + } else { 1.472 + cu_context_->reporter->UnknownAbstractOrigin(offset_, data); 1.473 + } 1.474 + break; 1.475 + } 1.476 + default: 1.477 + GenericDIEHandler::ProcessAttributeReference(attr, form, data); 1.478 + break; 1.479 + } 1.480 +} 1.481 + 1.482 +bool DwarfCUToModule::FuncHandler::EndAttributes() { 1.483 + // Compute our name, and record a specification, if appropriate. 1.484 + name_ = ComputeQualifiedName(); 1.485 + if (name_.empty() && abstract_origin_) { 1.486 + name_ = abstract_origin_->name; 1.487 + } 1.488 + return true; 1.489 +} 1.490 + 1.491 +void DwarfCUToModule::FuncHandler::Finish() { 1.492 + // Make high_pc_ an address, if it isn't already. 1.493 + if (high_pc_form_ != dwarf2reader::DW_FORM_addr) { 1.494 + high_pc_ += low_pc_; 1.495 + } 1.496 + 1.497 + // Did we collect the information we need? Not all DWARF function 1.498 + // entries have low and high addresses (for example, inlined 1.499 + // functions that were never used), but all the ones we're 1.500 + // interested in cover a non-empty range of bytes. 1.501 + if (low_pc_ < high_pc_) { 1.502 + // Create a Module::Function based on the data we've gathered, and 1.503 + // add it to the functions_ list. 1.504 + Module::Function *func = new Module::Function; 1.505 + // Malformed DWARF may omit the name, but all Module::Functions must 1.506 + // have names. 1.507 + if (!name_.empty()) { 1.508 + func->name = name_; 1.509 + } else { 1.510 + cu_context_->reporter->UnnamedFunction(offset_); 1.511 + func->name = "<name omitted>"; 1.512 + } 1.513 + func->address = low_pc_; 1.514 + func->size = high_pc_ - low_pc_; 1.515 + func->parameter_size = 0; 1.516 + if (func->address) { 1.517 + // If the function address is zero this is a sign that this function 1.518 + // description is just empty debug data and should just be discarded. 1.519 + cu_context_->functions.push_back(func); 1.520 + } 1.521 + } else if (inline_) { 1.522 + AbstractOrigin origin(name_); 1.523 + cu_context_->file_context->file_private->origins[offset_] = origin; 1.524 + } 1.525 +} 1.526 + 1.527 +// A handler for DIEs that contain functions and contribute a 1.528 +// component to their names: namespaces, classes, etc. 1.529 +class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { 1.530 + public: 1.531 + NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context, 1.532 + uint64 offset) 1.533 + : GenericDIEHandler(cu_context, parent_context, offset) { } 1.534 + bool EndAttributes(); 1.535 + DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag); 1.536 + 1.537 + private: 1.538 + DIEContext child_context_; // A context for our children. 1.539 +}; 1.540 + 1.541 +bool DwarfCUToModule::NamedScopeHandler::EndAttributes() { 1.542 + child_context_.name = ComputeQualifiedName(); 1.543 + return true; 1.544 +} 1.545 + 1.546 +dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler( 1.547 + uint64 offset, 1.548 + enum DwarfTag tag) { 1.549 + switch (tag) { 1.550 + case dwarf2reader::DW_TAG_subprogram: 1.551 + return new FuncHandler(cu_context_, &child_context_, offset); 1.552 + case dwarf2reader::DW_TAG_namespace: 1.553 + case dwarf2reader::DW_TAG_class_type: 1.554 + case dwarf2reader::DW_TAG_structure_type: 1.555 + case dwarf2reader::DW_TAG_union_type: 1.556 + return new NamedScopeHandler(cu_context_, &child_context_, offset); 1.557 + default: 1.558 + return NULL; 1.559 + } 1.560 +} 1.561 + 1.562 +void DwarfCUToModule::WarningReporter::CUHeading() { 1.563 + if (printed_cu_header_) 1.564 + return; 1.565 + BPLOG(INFO) 1.566 + << filename_ << ": in compilation unit '" << cu_name_ 1.567 + << "' (offset 0x" << std::setbase(16) << cu_offset_ << std::setbase(10) 1.568 + << "):"; 1.569 + printed_cu_header_ = true; 1.570 +} 1.571 + 1.572 +void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset, 1.573 + uint64 target) { 1.574 + CUHeading(); 1.575 + BPLOG(INFO) 1.576 + << filename_ << ": the DIE at offset 0x" 1.577 + << std::setbase(16) << offset << std::setbase(10) 1.578 + << " has a DW_AT_specification attribute referring to the die at offset 0x" 1.579 + << std::setbase(16) << target << std::setbase(10) 1.580 + << ", which either was not marked as a declaration, or comes " 1.581 + << "later in the file"; 1.582 +} 1.583 + 1.584 +void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset, 1.585 + uint64 target) { 1.586 + CUHeading(); 1.587 + BPLOG(INFO) 1.588 + << filename_ << ": the DIE at offset 0x" 1.589 + << std::setbase(16) << offset << std::setbase(10) 1.590 + << " has a DW_AT_abstract_origin attribute referring to the die at" 1.591 + << " offset 0x" << std::setbase(16) << target << std::setbase(10) 1.592 + << ", which either was not marked as an inline, or comes " 1.593 + << "later in the file"; 1.594 +} 1.595 + 1.596 +void DwarfCUToModule::WarningReporter::MissingSection(const string &name) { 1.597 + CUHeading(); 1.598 + BPLOG(INFO) << filename_ << ": warning: couldn't find DWARF '" 1.599 + << name << "' section"; 1.600 +} 1.601 + 1.602 +void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) { 1.603 + CUHeading(); 1.604 + BPLOG(INFO) << filename_ << ": warning: line number data offset beyond " 1.605 + << "end of '.debug_line' section"; 1.606 +} 1.607 + 1.608 +void DwarfCUToModule::WarningReporter::UncoveredHeading() { 1.609 + if (printed_unpaired_header_) 1.610 + return; 1.611 + CUHeading(); 1.612 + BPLOG(INFO) << filename_ << ": warning: skipping unpaired lines/functions:"; 1.613 + printed_unpaired_header_ = true; 1.614 +} 1.615 + 1.616 +void DwarfCUToModule::WarningReporter::UncoveredFunction( 1.617 + const Module::Function &function) { 1.618 + if (!uncovered_warnings_enabled_) 1.619 + return; 1.620 + UncoveredHeading(); 1.621 + BPLOG(INFO) << " function" << (function.size == 0 ? " (zero-length)" : "") 1.622 + << ": " << function.name; 1.623 +} 1.624 + 1.625 +void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) { 1.626 + if (!uncovered_warnings_enabled_) 1.627 + return; 1.628 + UncoveredHeading(); 1.629 + BPLOG(INFO) << " line" << (line.size == 0 ? " (zero-length)" : "") 1.630 + << ": " << line.file->name << ":" << line.number 1.631 + << " at 0x" << std::setbase(16) << line.address << std::setbase(10); 1.632 +} 1.633 + 1.634 +void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) { 1.635 + CUHeading(); 1.636 + BPLOG(INFO) << filename_ << ": warning: function at offset 0x" 1.637 + << std::setbase(16) << offset << std::setbase(10) << " has no name"; 1.638 +} 1.639 + 1.640 +DwarfCUToModule::DwarfCUToModule(FileContext *file_context, 1.641 + LineToModuleHandler *line_reader, 1.642 + WarningReporter *reporter) 1.643 + : line_reader_(line_reader), has_source_line_info_(false) { 1.644 + cu_context_ = new CUContext(file_context, reporter); 1.645 + child_context_ = new DIEContext(); 1.646 +} 1.647 + 1.648 +DwarfCUToModule::~DwarfCUToModule() { 1.649 + delete cu_context_; 1.650 + delete child_context_; 1.651 +} 1.652 + 1.653 +void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr, 1.654 + enum DwarfForm form, 1.655 + int64 data) { 1.656 + switch (attr) { 1.657 + case dwarf2reader::DW_AT_language: // source language of this CU 1.658 + SetLanguage(static_cast<DwarfLanguage>(data)); 1.659 + break; 1.660 + default: 1.661 + break; 1.662 + } 1.663 +} 1.664 + 1.665 +void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr, 1.666 + enum DwarfForm form, 1.667 + uint64 data) { 1.668 + switch (attr) { 1.669 + case dwarf2reader::DW_AT_stmt_list: // Line number information. 1.670 + has_source_line_info_ = true; 1.671 + source_line_offset_ = data; 1.672 + break; 1.673 + case dwarf2reader::DW_AT_language: // source language of this CU 1.674 + SetLanguage(static_cast<DwarfLanguage>(data)); 1.675 + break; 1.676 + default: 1.677 + break; 1.678 + } 1.679 +} 1.680 + 1.681 +void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr, 1.682 + enum DwarfForm form, 1.683 + const string &data) { 1.684 + switch (attr) { 1.685 + case dwarf2reader::DW_AT_name: 1.686 + cu_context_->reporter->SetCUName(data); 1.687 + break; 1.688 + case dwarf2reader::DW_AT_comp_dir: 1.689 + line_reader_->StartCompilationUnit(data); 1.690 + break; 1.691 + default: 1.692 + break; 1.693 + } 1.694 +} 1.695 + 1.696 +bool DwarfCUToModule::EndAttributes() { 1.697 + return true; 1.698 +} 1.699 + 1.700 +dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler( 1.701 + uint64 offset, 1.702 + enum DwarfTag tag) { 1.703 + switch (tag) { 1.704 + case dwarf2reader::DW_TAG_subprogram: 1.705 + return new FuncHandler(cu_context_, child_context_, offset); 1.706 + case dwarf2reader::DW_TAG_namespace: 1.707 + case dwarf2reader::DW_TAG_class_type: 1.708 + case dwarf2reader::DW_TAG_structure_type: 1.709 + case dwarf2reader::DW_TAG_union_type: 1.710 + return new NamedScopeHandler(cu_context_, child_context_, offset); 1.711 + default: 1.712 + return NULL; 1.713 + } 1.714 +} 1.715 + 1.716 +void DwarfCUToModule::SetLanguage(DwarfLanguage language) { 1.717 + switch (language) { 1.718 + case dwarf2reader::DW_LANG_Java: 1.719 + cu_context_->language = Language::Java; 1.720 + break; 1.721 + 1.722 + // DWARF has no generic language code for assembly language; this is 1.723 + // what the GNU toolchain uses. 1.724 + case dwarf2reader::DW_LANG_Mips_Assembler: 1.725 + cu_context_->language = Language::Assembler; 1.726 + break; 1.727 + 1.728 + // C++ covers so many cases that it probably has some way to cope 1.729 + // with whatever the other languages throw at us. So make it the 1.730 + // default. 1.731 + // 1.732 + // Objective C and Objective C++ seem to create entries for 1.733 + // methods whose DW_AT_name values are already fully-qualified: 1.734 + // "-[Classname method:]". These appear at the top level. 1.735 + // 1.736 + // DWARF data for C should never include namespaces or functions 1.737 + // nested in struct types, but if it ever does, then C++'s 1.738 + // notation is probably not a bad choice for that. 1.739 + default: 1.740 + case dwarf2reader::DW_LANG_ObjC: 1.741 + case dwarf2reader::DW_LANG_ObjC_plus_plus: 1.742 + case dwarf2reader::DW_LANG_C: 1.743 + case dwarf2reader::DW_LANG_C89: 1.744 + case dwarf2reader::DW_LANG_C99: 1.745 + case dwarf2reader::DW_LANG_C_plus_plus: 1.746 + cu_context_->language = Language::CPlusPlus; 1.747 + break; 1.748 + } 1.749 +} 1.750 + 1.751 +void DwarfCUToModule::ReadSourceLines(uint64 offset) { 1.752 + const dwarf2reader::SectionMap §ion_map 1.753 + = cu_context_->file_context->section_map; 1.754 + dwarf2reader::SectionMap::const_iterator map_entry 1.755 + = section_map.find(".debug_line"); 1.756 + // Mac OS X puts DWARF data in sections whose names begin with "__" 1.757 + // instead of ".". 1.758 + if (map_entry == section_map.end()) 1.759 + map_entry = section_map.find("__debug_line"); 1.760 + if (map_entry == section_map.end()) { 1.761 + cu_context_->reporter->MissingSection(".debug_line"); 1.762 + return; 1.763 + } 1.764 + const char *section_start = map_entry->second.first; 1.765 + uint64 section_length = map_entry->second.second; 1.766 + if (offset >= section_length) { 1.767 + cu_context_->reporter->BadLineInfoOffset(offset); 1.768 + return; 1.769 + } 1.770 + line_reader_->ReadProgram(section_start + offset, section_length - offset, 1.771 + cu_context_->file_context->module, &lines_); 1.772 +} 1.773 + 1.774 +namespace { 1.775 +// Return true if ADDRESS falls within the range of ITEM. 1.776 +template <class T> 1.777 +inline bool within(const T &item, Module::Address address) { 1.778 + // Because Module::Address is unsigned, and unsigned arithmetic 1.779 + // wraps around, this will be false if ADDRESS falls before the 1.780 + // start of ITEM, or if it falls after ITEM's end. 1.781 + return address - item.address < item.size; 1.782 +} 1.783 +} 1.784 + 1.785 +void DwarfCUToModule::AssignLinesToFunctions() { 1.786 + vector<Module::Function *> *functions = &cu_context_->functions; 1.787 + WarningReporter *reporter = cu_context_->reporter; 1.788 + 1.789 + // This would be simpler if we assumed that source line entries 1.790 + // don't cross function boundaries. However, there's no real reason 1.791 + // to assume that (say) a series of function definitions on the same 1.792 + // line wouldn't get coalesced into one line number entry. The 1.793 + // DWARF spec certainly makes no such promises. 1.794 + // 1.795 + // So treat the functions and lines as peers, and take the trouble 1.796 + // to compute their ranges' intersections precisely. In any case, 1.797 + // the hair here is a constant factor for performance; the 1.798 + // complexity from here on out is linear. 1.799 + 1.800 + // Put both our functions and lines in order by address. 1.801 + std::sort(functions->begin(), functions->end(), 1.802 + Module::Function::CompareByAddress); 1.803 + std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress); 1.804 + 1.805 + // The last line that we used any piece of. We use this only for 1.806 + // generating warnings. 1.807 + const Module::Line *last_line_used = NULL; 1.808 + 1.809 + // The last function and line we warned about --- so we can avoid 1.810 + // doing so more than once. 1.811 + const Module::Function *last_function_cited = NULL; 1.812 + const Module::Line *last_line_cited = NULL; 1.813 + 1.814 + // Make a single pass through both vectors from lower to higher 1.815 + // addresses, populating each Function's lines vector with lines 1.816 + // from our lines_ vector that fall within the function's address 1.817 + // range. 1.818 + vector<Module::Function *>::iterator func_it = functions->begin(); 1.819 + vector<Module::Line>::const_iterator line_it = lines_.begin(); 1.820 + 1.821 + Module::Address current; 1.822 + 1.823 + // Pointers to the referents of func_it and line_it, or NULL if the 1.824 + // iterator is at the end of the sequence. 1.825 + Module::Function *func; 1.826 + const Module::Line *line; 1.827 + 1.828 + // Start current at the beginning of the first line or function, 1.829 + // whichever is earlier. 1.830 + if (func_it != functions->end() && line_it != lines_.end()) { 1.831 + func = *func_it; 1.832 + line = &*line_it; 1.833 + current = std::min(func->address, line->address); 1.834 + } else if (line_it != lines_.end()) { 1.835 + func = NULL; 1.836 + line = &*line_it; 1.837 + current = line->address; 1.838 + } else if (func_it != functions->end()) { 1.839 + func = *func_it; 1.840 + line = NULL; 1.841 + current = (*func_it)->address; 1.842 + } else { 1.843 + return; 1.844 + } 1.845 + 1.846 + while (func || line) { 1.847 + // This loop has two invariants that hold at the top. 1.848 + // 1.849 + // First, at least one of the iterators is not at the end of its 1.850 + // sequence, and those that are not refer to the earliest 1.851 + // function or line that contains or starts after CURRENT. 1.852 + // 1.853 + // Note that every byte is in one of four states: it is covered 1.854 + // or not covered by a function, and, independently, it is 1.855 + // covered or not covered by a line. 1.856 + // 1.857 + // The second invariant is that CURRENT refers to a byte whose 1.858 + // state is different from its predecessor, or it refers to the 1.859 + // first byte in the address space. In other words, CURRENT is 1.860 + // always the address of a transition. 1.861 + // 1.862 + // Note that, although each iteration advances CURRENT from one 1.863 + // transition address to the next in each iteration, it might 1.864 + // not advance the iterators. Suppose we have a function that 1.865 + // starts with a line, has a gap, and then a second line, and 1.866 + // suppose that we enter an iteration with CURRENT at the end of 1.867 + // the first line. The next transition address is the start of 1.868 + // the second line, after the gap, so the iteration should 1.869 + // advance CURRENT to that point. At the head of that iteration, 1.870 + // the invariants require that the line iterator be pointing at 1.871 + // the second line. But this is also true at the head of the 1.872 + // next. And clearly, the iteration must not change the function 1.873 + // iterator. So neither iterator moves. 1.874 + 1.875 + // Assert the first invariant (see above). 1.876 + assert(!func || current < func->address || within(*func, current)); 1.877 + assert(!line || current < line->address || within(*line, current)); 1.878 + 1.879 + // The next transition after CURRENT. 1.880 + Module::Address next_transition; 1.881 + 1.882 + // Figure out which state we're in, add lines or warn, and compute 1.883 + // the next transition address. 1.884 + if (func && current >= func->address) { 1.885 + if (line && current >= line->address) { 1.886 + // Covered by both a line and a function. 1.887 + Module::Address func_left = func->size - (current - func->address); 1.888 + Module::Address line_left = line->size - (current - line->address); 1.889 + // This may overflow, but things work out. 1.890 + next_transition = current + std::min(func_left, line_left); 1.891 + Module::Line l = *line; 1.892 + l.address = current; 1.893 + l.size = next_transition - current; 1.894 + func->lines.push_back(l); 1.895 + last_line_used = line; 1.896 + } else { 1.897 + // Covered by a function, but no line. 1.898 + if (func != last_function_cited) { 1.899 + reporter->UncoveredFunction(*func); 1.900 + last_function_cited = func; 1.901 + } 1.902 + if (line && within(*func, line->address)) 1.903 + next_transition = line->address; 1.904 + else 1.905 + // If this overflows, we'll catch it below. 1.906 + next_transition = func->address + func->size; 1.907 + } 1.908 + } else { 1.909 + if (line && current >= line->address) { 1.910 + // Covered by a line, but no function. 1.911 + // 1.912 + // If GCC emits padding after one function to align the start 1.913 + // of the next, then it will attribute the padding 1.914 + // instructions to the last source line of function (to reduce 1.915 + // the size of the line number info), but omit it from the 1.916 + // DW_AT_{low,high}_pc range given in .debug_info (since it 1.917 + // costs nothing to be precise there). If we did use at least 1.918 + // some of the line we're about to skip, and it ends at the 1.919 + // start of the next function, then assume this is what 1.920 + // happened, and don't warn. 1.921 + if (line != last_line_cited 1.922 + && !(func 1.923 + && line == last_line_used 1.924 + && func->address - line->address == line->size)) { 1.925 + reporter->UncoveredLine(*line); 1.926 + last_line_cited = line; 1.927 + } 1.928 + if (func && within(*line, func->address)) 1.929 + next_transition = func->address; 1.930 + else 1.931 + // If this overflows, we'll catch it below. 1.932 + next_transition = line->address + line->size; 1.933 + } else { 1.934 + // Covered by neither a function nor a line. By the invariant, 1.935 + // both func and line begin after CURRENT. The next transition 1.936 + // is the start of the next function or next line, whichever 1.937 + // is earliest. 1.938 + assert (func || line); 1.939 + if (func && line) 1.940 + next_transition = std::min(func->address, line->address); 1.941 + else if (func) 1.942 + next_transition = func->address; 1.943 + else 1.944 + next_transition = line->address; 1.945 + } 1.946 + } 1.947 + 1.948 + // If a function or line abuts the end of the address space, then 1.949 + // next_transition may end up being zero, in which case we've completed 1.950 + // our pass. Handle that here, instead of trying to deal with it in 1.951 + // each place we compute next_transition. 1.952 + if (!next_transition) 1.953 + break; 1.954 + 1.955 + // Advance iterators as needed. If lines overlap or functions overlap, 1.956 + // then we could go around more than once. We don't worry too much 1.957 + // about what result we produce in that case, just as long as we don't 1.958 + // hang or crash. 1.959 + while (func_it != functions->end() 1.960 + && next_transition >= (*func_it)->address 1.961 + && !within(**func_it, next_transition)) 1.962 + func_it++; 1.963 + func = (func_it != functions->end()) ? *func_it : NULL; 1.964 + while (line_it != lines_.end() 1.965 + && next_transition >= line_it->address 1.966 + && !within(*line_it, next_transition)) 1.967 + line_it++; 1.968 + line = (line_it != lines_.end()) ? &*line_it : NULL; 1.969 + 1.970 + // We must make progress. 1.971 + assert(next_transition > current); 1.972 + current = next_transition; 1.973 + } 1.974 +} 1.975 + 1.976 +void DwarfCUToModule::Finish() { 1.977 + // Assembly language files have no function data, and that gives us 1.978 + // no place to store our line numbers (even though the GNU toolchain 1.979 + // will happily produce source line info for assembly language 1.980 + // files). To avoid spurious warnings about lines we can't assign 1.981 + // to functions, skip CUs in languages that lack functions. 1.982 + if (!cu_context_->language->HasFunctions()) 1.983 + return; 1.984 + 1.985 + // Read source line info, if we have any. 1.986 + if (has_source_line_info_) 1.987 + ReadSourceLines(source_line_offset_); 1.988 + 1.989 + vector<Module::Function *> *functions = &cu_context_->functions; 1.990 + 1.991 + // Dole out lines to the appropriate functions. 1.992 + AssignLinesToFunctions(); 1.993 + 1.994 + // Add our functions, which now have source lines assigned to them, 1.995 + // to module_. 1.996 + cu_context_->file_context->module->AddFunctions(functions->begin(), 1.997 + functions->end()); 1.998 + 1.999 + // Ownership of the function objects has shifted from cu_context to 1.1000 + // the Module. 1.1001 + functions->clear(); 1.1002 +} 1.1003 + 1.1004 +bool DwarfCUToModule::StartCompilationUnit(uint64 offset, 1.1005 + uint8 address_size, 1.1006 + uint8 offset_size, 1.1007 + uint64 cu_length, 1.1008 + uint8 dwarf_version) { 1.1009 + return dwarf_version >= 2; 1.1010 +} 1.1011 + 1.1012 +bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag) { 1.1013 + // We don't deal with partial compilation units (the only other tag 1.1014 + // likely to be used for root DIE). 1.1015 + return tag == dwarf2reader::DW_TAG_compile_unit; 1.1016 +} 1.1017 + 1.1018 +} // namespace google_breakpad