michael@0: // Copyright (c) 2010 Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Original author: Jim Blandy michael@0: michael@0: // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h. michael@0: michael@0: // For PRI* macros, before anything else might #include it. michael@0: #ifndef __STDC_FORMAT_MACROS michael@0: #define __STDC_FORMAT_MACROS michael@0: #endif /* __STDC_FORMAT_MACROS */ michael@0: michael@0: #include "common/dwarf_cu_to_module.h" michael@0: michael@0: #include michael@0: #if !defined(__ANDROID__) michael@0: #include michael@0: #endif michael@0: #include michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "common/dwarf_line_to_module.h" michael@0: #include "common/logging.h" michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: using std::map; michael@0: using std::pair; michael@0: using std::set; michael@0: using std::sort; michael@0: using std::vector; michael@0: michael@0: // Data provided by a DWARF specification DIE. michael@0: // michael@0: // In DWARF, the DIE for a definition may contain a DW_AT_specification michael@0: // attribute giving the offset of the corresponding declaration DIE, and michael@0: // the definition DIE may omit information given in the declaration. For michael@0: // example, it's common for a function's address range to appear only in michael@0: // its definition DIE, but its name to appear only in its declaration michael@0: // DIE. michael@0: // michael@0: // The dumper needs to be able to follow DW_AT_specification links to michael@0: // bring all this information together in a FUNC record. Conveniently, michael@0: // DIEs that are the target of such links have a DW_AT_declaration flag michael@0: // set, so we can identify them when we first see them, and record their michael@0: // contents for later reference. michael@0: // michael@0: // A Specification holds information gathered from a declaration DIE that michael@0: // we may need if we find a DW_AT_specification link pointing to it. michael@0: struct DwarfCUToModule::Specification { michael@0: // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name. michael@0: string qualified_name; michael@0: michael@0: // The name of the enclosing scope, or the empty string if there is none. michael@0: string enclosing_name; michael@0: michael@0: // The name for the specification DIE itself, without any enclosing michael@0: // name components. michael@0: string unqualified_name; michael@0: }; michael@0: michael@0: // An abstract origin -- base definition of an inline function. michael@0: struct AbstractOrigin { michael@0: AbstractOrigin() : name() {} michael@0: AbstractOrigin(const string& name) : name(name) {} michael@0: michael@0: string name; michael@0: }; michael@0: michael@0: typedef map AbstractOriginByOffset; michael@0: michael@0: // Data global to the DWARF-bearing file that is private to the michael@0: // DWARF-to-Module process. michael@0: struct DwarfCUToModule::FilePrivate { michael@0: // A set of strings used in this CU. Before storing a string in one of michael@0: // our data structures, insert it into this set, and then use the string michael@0: // from the set. michael@0: // michael@0: // In some STL implementations, strings are reference-counted internally, michael@0: // meaning that simply using strings from this set, even if passed by michael@0: // value, assigned, or held directly in structures and containers michael@0: // (map, for example), causes those strings to share a michael@0: // single instance of each distinct piece of text. GNU's libstdc++ uses michael@0: // reference counts, and I believe MSVC did as well, at some point. michael@0: // However, C++ '11 implementations are moving away from reference michael@0: // counting. michael@0: // michael@0: // In other implementations, string assignments copy the string's text, michael@0: // so this set will actually hold yet another copy of the string (although michael@0: // everything will still work). To improve memory consumption portably, michael@0: // we will probably need to use pointers to strings held in this set. michael@0: set common_strings; michael@0: michael@0: // A map from offsets of DIEs within the .debug_info section to michael@0: // Specifications describing those DIEs. Specification references can michael@0: // cross compilation unit boundaries. michael@0: SpecificationByOffset specifications; michael@0: michael@0: AbstractOriginByOffset origins; michael@0: }; michael@0: michael@0: DwarfCUToModule::FileContext::FileContext(const string &filename_arg, michael@0: Module *module_arg) michael@0: : filename(filename_arg), module(module_arg) { michael@0: file_private = new FilePrivate(); michael@0: } michael@0: michael@0: DwarfCUToModule::FileContext::~FileContext() { michael@0: delete file_private; michael@0: } michael@0: michael@0: // Information global to the particular compilation unit we're michael@0: // parsing. This is for data shared across the CU's entire DIE tree, michael@0: // and parameters from the code invoking the CU parser. michael@0: struct DwarfCUToModule::CUContext { michael@0: CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg) michael@0: : file_context(file_context_arg), michael@0: reporter(reporter_arg), michael@0: language(Language::CPlusPlus) { } michael@0: ~CUContext() { michael@0: for (vector::iterator it = functions.begin(); michael@0: it != functions.end(); it++) michael@0: delete *it; michael@0: }; michael@0: michael@0: // The DWARF-bearing file into which this CU was incorporated. michael@0: FileContext *file_context; michael@0: michael@0: // For printing error messages. michael@0: WarningReporter *reporter; michael@0: michael@0: // The source language of this compilation unit. michael@0: const Language *language; michael@0: michael@0: // The functions defined in this compilation unit. We accumulate michael@0: // them here during parsing. Then, in DwarfCUToModule::Finish, we michael@0: // assign them lines and add them to file_context->module. michael@0: // michael@0: // Destroying this destroys all the functions this vector points to. michael@0: vector functions; michael@0: }; michael@0: michael@0: // Information about the context of a particular DIE. This is for michael@0: // information that changes as we descend the tree towards the leaves: michael@0: // the containing classes/namespaces, etc. michael@0: struct DwarfCUToModule::DIEContext { michael@0: // The fully-qualified name of the context. For example, for a michael@0: // tree like: michael@0: // michael@0: // DW_TAG_namespace Foo michael@0: // DW_TAG_class Bar michael@0: // DW_TAG_subprogram Baz michael@0: // michael@0: // in a C++ compilation unit, the DIEContext's name for the michael@0: // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's michael@0: // name for the DW_TAG_namespace DIE would be "". michael@0: string name; michael@0: }; michael@0: michael@0: // An abstract base class for all the dumper's DIE handlers. michael@0: class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler { michael@0: public: michael@0: // Create a handler for the DIE at OFFSET whose compilation unit is michael@0: // described by CU_CONTEXT, and whose immediate context is described michael@0: // by PARENT_CONTEXT. michael@0: GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context, michael@0: uint64 offset) michael@0: : cu_context_(cu_context), michael@0: parent_context_(parent_context), michael@0: offset_(offset), michael@0: declaration_(false), michael@0: specification_(NULL) { } michael@0: michael@0: // Derived classes' ProcessAttributeUnsigned can defer to this to michael@0: // handle DW_AT_declaration, or simply not override it. michael@0: void ProcessAttributeUnsigned(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data); michael@0: michael@0: // Derived classes' ProcessAttributeReference can defer to this to michael@0: // handle DW_AT_specification, or simply not override it. michael@0: void ProcessAttributeReference(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data); michael@0: michael@0: // Derived classes' ProcessAttributeReference can defer to this to michael@0: // handle DW_AT_specification, or simply not override it. michael@0: void ProcessAttributeString(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: const string &data); michael@0: michael@0: protected: michael@0: // Compute and return the fully-qualified name of the DIE. If this michael@0: // DIE is a declaration DIE, to be cited by other DIEs' michael@0: // DW_AT_specification attributes, record its enclosing name and michael@0: // unqualified name in the specification table. michael@0: // michael@0: // Use this from EndAttributes member functions, not ProcessAttribute* michael@0: // functions; only the former can be sure that all the DIE's attributes michael@0: // have been seen. michael@0: string ComputeQualifiedName(); michael@0: michael@0: CUContext *cu_context_; michael@0: DIEContext *parent_context_; michael@0: uint64 offset_; michael@0: michael@0: // Place the name in the global set of strings. Even though this looks michael@0: // like a copy, all the major std::string implementations use reference michael@0: // counting internally, so the effect is to have all the data structures michael@0: // share copies of strings whenever possible. michael@0: // FIXME: Should this return something like a string_ref to avoid the michael@0: // assumption about how strings are implemented? michael@0: string AddStringToPool(const string &str); michael@0: michael@0: // If this DIE has a DW_AT_declaration attribute, this is its value. michael@0: // It is false on DIEs with no DW_AT_declaration attribute. michael@0: bool declaration_; michael@0: michael@0: // If this DIE has a DW_AT_specification attribute, this is the michael@0: // Specification structure for the DIE the attribute refers to. michael@0: // Otherwise, this is NULL. michael@0: Specification *specification_; michael@0: michael@0: // The value of the DW_AT_name attribute, or the empty string if the michael@0: // DIE has no such attribute. michael@0: string name_attribute_; michael@0: michael@0: // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty michael@0: // string if the DIE has no such attribute or its content could not be michael@0: // demangled. michael@0: string demangled_name_; michael@0: }; michael@0: michael@0: void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned( michael@0: enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data) { michael@0: switch (attr) { michael@0: case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break; michael@0: default: break; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference( michael@0: enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data) { michael@0: switch (attr) { michael@0: case dwarf2reader::DW_AT_specification: { michael@0: // Find the Specification to which this attribute refers, and michael@0: // set specification_ appropriately. We could do more processing michael@0: // here, but it's better to leave the real work to our michael@0: // EndAttribute member function, at which point we know we have michael@0: // seen all the DIE's attributes. michael@0: FileContext *file_context = cu_context_->file_context; michael@0: SpecificationByOffset *specifications michael@0: = &file_context->file_private->specifications; michael@0: SpecificationByOffset::iterator spec = specifications->find(data); michael@0: if (spec != specifications->end()) { michael@0: specification_ = &spec->second; michael@0: } else { michael@0: // Technically, there's no reason a DW_AT_specification michael@0: // couldn't be a forward reference, but supporting that would michael@0: // be a lot of work (changing to a two-pass structure), and I michael@0: // don't think any producers we care about ever emit such michael@0: // things. michael@0: cu_context_->reporter->UnknownSpecification(offset_, data); michael@0: } michael@0: break; michael@0: } michael@0: default: break; michael@0: } michael@0: } michael@0: michael@0: string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string &str) { michael@0: pair::iterator, bool> result = michael@0: cu_context_->file_context->file_private->common_strings.insert(str); michael@0: return *result.first; michael@0: } michael@0: michael@0: void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString( michael@0: enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: const string &data) { michael@0: switch (attr) { michael@0: case dwarf2reader::DW_AT_name: michael@0: name_attribute_ = AddStringToPool(data); michael@0: break; michael@0: case dwarf2reader::DW_AT_MIPS_linkage_name: { michael@0: char* demangled = NULL; michael@0: #if !defined(__ANDROID__) michael@0: demangled = abi::__cxa_demangle(data.c_str(), NULL, NULL, NULL); michael@0: #endif michael@0: if (demangled) { michael@0: demangled_name_ = AddStringToPool(demangled); michael@0: free(reinterpret_cast(demangled)); michael@0: } michael@0: break; michael@0: } michael@0: default: break; michael@0: } michael@0: } michael@0: michael@0: string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() { michael@0: // Use the demangled name, if one is available. Demangled names are michael@0: // preferable to those inferred from the DWARF structure because they michael@0: // include argument types. michael@0: const string *qualified_name = NULL; michael@0: if (!demangled_name_.empty()) { michael@0: // Found it is this DIE. michael@0: qualified_name = &demangled_name_; michael@0: } else if (specification_ && !specification_->qualified_name.empty()) { michael@0: // Found it on the specification. michael@0: qualified_name = &specification_->qualified_name; michael@0: } michael@0: michael@0: const string *unqualified_name; michael@0: const string *enclosing_name; michael@0: if (!qualified_name) { michael@0: // Find our unqualified name. If the DIE has its own DW_AT_name michael@0: // attribute, then use that; otherwise, check our specification. michael@0: if (name_attribute_.empty() && specification_) michael@0: unqualified_name = &specification_->unqualified_name; michael@0: else michael@0: unqualified_name = &name_attribute_; michael@0: michael@0: // Find the name of our enclosing context. If we have a michael@0: // specification, it's the specification's enclosing context that michael@0: // counts; otherwise, use this DIE's context. michael@0: if (specification_) michael@0: enclosing_name = &specification_->enclosing_name; michael@0: else michael@0: enclosing_name = &parent_context_->name; michael@0: } michael@0: michael@0: // If this DIE was marked as a declaration, record its names in the michael@0: // specification table. michael@0: if (declaration_) { michael@0: FileContext *file_context = cu_context_->file_context; michael@0: Specification spec; michael@0: if (qualified_name) michael@0: spec.qualified_name = *qualified_name; michael@0: else { michael@0: spec.enclosing_name = *enclosing_name; michael@0: spec.unqualified_name = *unqualified_name; michael@0: } michael@0: file_context->file_private->specifications[offset_] = spec; michael@0: } michael@0: michael@0: if (qualified_name) michael@0: return *qualified_name; michael@0: michael@0: // Combine the enclosing name and unqualified name to produce our michael@0: // own fully-qualified name. michael@0: return cu_context_->language->MakeQualifiedName(*enclosing_name, michael@0: *unqualified_name); michael@0: } michael@0: michael@0: // A handler class for DW_TAG_subprogram DIEs. michael@0: class DwarfCUToModule::FuncHandler: public GenericDIEHandler { michael@0: public: michael@0: FuncHandler(CUContext *cu_context, DIEContext *parent_context, michael@0: uint64 offset) michael@0: : GenericDIEHandler(cu_context, parent_context, offset), michael@0: low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr), michael@0: abstract_origin_(NULL), inline_(false) { } michael@0: void ProcessAttributeUnsigned(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data); michael@0: void ProcessAttributeSigned(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: int64 data); michael@0: void ProcessAttributeReference(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data); michael@0: michael@0: bool EndAttributes(); michael@0: void Finish(); michael@0: michael@0: private: michael@0: // The fully-qualified name, as derived from name_attribute_, michael@0: // specification_, parent_context_. Computed in EndAttributes. michael@0: string name_; michael@0: uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc michael@0: DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address. michael@0: const AbstractOrigin* abstract_origin_; michael@0: bool inline_; michael@0: }; michael@0: michael@0: void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned( michael@0: enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data) { michael@0: switch (attr) { michael@0: // If this attribute is present at all --- even if its value is michael@0: // DW_INL_not_inlined --- then GCC may cite it as someone else's michael@0: // DW_AT_abstract_origin attribute. michael@0: case dwarf2reader::DW_AT_inline: inline_ = true; break; michael@0: michael@0: case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break; michael@0: case dwarf2reader::DW_AT_high_pc: michael@0: high_pc_form_ = form; michael@0: high_pc_ = data; michael@0: break; michael@0: michael@0: default: michael@0: GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::FuncHandler::ProcessAttributeSigned( michael@0: enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: int64 data) { michael@0: switch (attr) { michael@0: // If this attribute is present at all --- even if its value is michael@0: // DW_INL_not_inlined --- then GCC may cite it as someone else's michael@0: // DW_AT_abstract_origin attribute. michael@0: case dwarf2reader::DW_AT_inline: inline_ = true; break; michael@0: michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::FuncHandler::ProcessAttributeReference( michael@0: enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data) { michael@0: switch(attr) { michael@0: case dwarf2reader::DW_AT_abstract_origin: { michael@0: const AbstractOriginByOffset& origins = michael@0: cu_context_->file_context->file_private->origins; michael@0: AbstractOriginByOffset::const_iterator origin = origins.find(data); michael@0: if (origin != origins.end()) { michael@0: abstract_origin_ = &(origin->second); michael@0: } else { michael@0: cu_context_->reporter->UnknownAbstractOrigin(offset_, data); michael@0: } michael@0: break; michael@0: } michael@0: default: michael@0: GenericDIEHandler::ProcessAttributeReference(attr, form, data); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: bool DwarfCUToModule::FuncHandler::EndAttributes() { michael@0: // Compute our name, and record a specification, if appropriate. michael@0: name_ = ComputeQualifiedName(); michael@0: if (name_.empty() && abstract_origin_) { michael@0: name_ = abstract_origin_->name; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: void DwarfCUToModule::FuncHandler::Finish() { michael@0: // Make high_pc_ an address, if it isn't already. michael@0: if (high_pc_form_ != dwarf2reader::DW_FORM_addr) { michael@0: high_pc_ += low_pc_; michael@0: } michael@0: michael@0: // Did we collect the information we need? Not all DWARF function michael@0: // entries have low and high addresses (for example, inlined michael@0: // functions that were never used), but all the ones we're michael@0: // interested in cover a non-empty range of bytes. michael@0: if (low_pc_ < high_pc_) { michael@0: // Create a Module::Function based on the data we've gathered, and michael@0: // add it to the functions_ list. michael@0: Module::Function *func = new Module::Function; michael@0: // Malformed DWARF may omit the name, but all Module::Functions must michael@0: // have names. michael@0: if (!name_.empty()) { michael@0: func->name = name_; michael@0: } else { michael@0: cu_context_->reporter->UnnamedFunction(offset_); michael@0: func->name = ""; michael@0: } michael@0: func->address = low_pc_; michael@0: func->size = high_pc_ - low_pc_; michael@0: func->parameter_size = 0; michael@0: if (func->address) { michael@0: // If the function address is zero this is a sign that this function michael@0: // description is just empty debug data and should just be discarded. michael@0: cu_context_->functions.push_back(func); michael@0: } michael@0: } else if (inline_) { michael@0: AbstractOrigin origin(name_); michael@0: cu_context_->file_context->file_private->origins[offset_] = origin; michael@0: } michael@0: } michael@0: michael@0: // A handler for DIEs that contain functions and contribute a michael@0: // component to their names: namespaces, classes, etc. michael@0: class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { michael@0: public: michael@0: NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context, michael@0: uint64 offset) michael@0: : GenericDIEHandler(cu_context, parent_context, offset) { } michael@0: bool EndAttributes(); michael@0: DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag); michael@0: michael@0: private: michael@0: DIEContext child_context_; // A context for our children. michael@0: }; michael@0: michael@0: bool DwarfCUToModule::NamedScopeHandler::EndAttributes() { michael@0: child_context_.name = ComputeQualifiedName(); michael@0: return true; michael@0: } michael@0: michael@0: dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler( michael@0: uint64 offset, michael@0: enum DwarfTag tag) { michael@0: switch (tag) { michael@0: case dwarf2reader::DW_TAG_subprogram: michael@0: return new FuncHandler(cu_context_, &child_context_, offset); michael@0: case dwarf2reader::DW_TAG_namespace: michael@0: case dwarf2reader::DW_TAG_class_type: michael@0: case dwarf2reader::DW_TAG_structure_type: michael@0: case dwarf2reader::DW_TAG_union_type: michael@0: return new NamedScopeHandler(cu_context_, &child_context_, offset); michael@0: default: michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::CUHeading() { michael@0: if (printed_cu_header_) michael@0: return; michael@0: BPLOG(INFO) michael@0: << filename_ << ": in compilation unit '" << cu_name_ michael@0: << "' (offset 0x" << std::setbase(16) << cu_offset_ << std::setbase(10) michael@0: << "):"; michael@0: printed_cu_header_ = true; michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset, michael@0: uint64 target) { michael@0: CUHeading(); michael@0: BPLOG(INFO) michael@0: << filename_ << ": the DIE at offset 0x" michael@0: << std::setbase(16) << offset << std::setbase(10) michael@0: << " has a DW_AT_specification attribute referring to the die at offset 0x" michael@0: << std::setbase(16) << target << std::setbase(10) michael@0: << ", which either was not marked as a declaration, or comes " michael@0: << "later in the file"; michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset, michael@0: uint64 target) { michael@0: CUHeading(); michael@0: BPLOG(INFO) michael@0: << filename_ << ": the DIE at offset 0x" michael@0: << std::setbase(16) << offset << std::setbase(10) michael@0: << " has a DW_AT_abstract_origin attribute referring to the die at" michael@0: << " offset 0x" << std::setbase(16) << target << std::setbase(10) michael@0: << ", which either was not marked as an inline, or comes " michael@0: << "later in the file"; michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::MissingSection(const string &name) { michael@0: CUHeading(); michael@0: BPLOG(INFO) << filename_ << ": warning: couldn't find DWARF '" michael@0: << name << "' section"; michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) { michael@0: CUHeading(); michael@0: BPLOG(INFO) << filename_ << ": warning: line number data offset beyond " michael@0: << "end of '.debug_line' section"; michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::UncoveredHeading() { michael@0: if (printed_unpaired_header_) michael@0: return; michael@0: CUHeading(); michael@0: BPLOG(INFO) << filename_ << ": warning: skipping unpaired lines/functions:"; michael@0: printed_unpaired_header_ = true; michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::UncoveredFunction( michael@0: const Module::Function &function) { michael@0: if (!uncovered_warnings_enabled_) michael@0: return; michael@0: UncoveredHeading(); michael@0: BPLOG(INFO) << " function" << (function.size == 0 ? " (zero-length)" : "") michael@0: << ": " << function.name; michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) { michael@0: if (!uncovered_warnings_enabled_) michael@0: return; michael@0: UncoveredHeading(); michael@0: BPLOG(INFO) << " line" << (line.size == 0 ? " (zero-length)" : "") michael@0: << ": " << line.file->name << ":" << line.number michael@0: << " at 0x" << std::setbase(16) << line.address << std::setbase(10); michael@0: } michael@0: michael@0: void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) { michael@0: CUHeading(); michael@0: BPLOG(INFO) << filename_ << ": warning: function at offset 0x" michael@0: << std::setbase(16) << offset << std::setbase(10) << " has no name"; michael@0: } michael@0: michael@0: DwarfCUToModule::DwarfCUToModule(FileContext *file_context, michael@0: LineToModuleHandler *line_reader, michael@0: WarningReporter *reporter) michael@0: : line_reader_(line_reader), has_source_line_info_(false) { michael@0: cu_context_ = new CUContext(file_context, reporter); michael@0: child_context_ = new DIEContext(); michael@0: } michael@0: michael@0: DwarfCUToModule::~DwarfCUToModule() { michael@0: delete cu_context_; michael@0: delete child_context_; michael@0: } michael@0: michael@0: void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: int64 data) { michael@0: switch (attr) { michael@0: case dwarf2reader::DW_AT_language: // source language of this CU michael@0: SetLanguage(static_cast(data)); michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: uint64 data) { michael@0: switch (attr) { michael@0: case dwarf2reader::DW_AT_stmt_list: // Line number information. michael@0: has_source_line_info_ = true; michael@0: source_line_offset_ = data; michael@0: break; michael@0: case dwarf2reader::DW_AT_language: // source language of this CU michael@0: SetLanguage(static_cast(data)); michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr, michael@0: enum DwarfForm form, michael@0: const string &data) { michael@0: switch (attr) { michael@0: case dwarf2reader::DW_AT_name: michael@0: cu_context_->reporter->SetCUName(data); michael@0: break; michael@0: case dwarf2reader::DW_AT_comp_dir: michael@0: line_reader_->StartCompilationUnit(data); michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: michael@0: bool DwarfCUToModule::EndAttributes() { michael@0: return true; michael@0: } michael@0: michael@0: dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler( michael@0: uint64 offset, michael@0: enum DwarfTag tag) { michael@0: switch (tag) { michael@0: case dwarf2reader::DW_TAG_subprogram: michael@0: return new FuncHandler(cu_context_, child_context_, offset); michael@0: case dwarf2reader::DW_TAG_namespace: michael@0: case dwarf2reader::DW_TAG_class_type: michael@0: case dwarf2reader::DW_TAG_structure_type: michael@0: case dwarf2reader::DW_TAG_union_type: michael@0: return new NamedScopeHandler(cu_context_, child_context_, offset); michael@0: default: michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::SetLanguage(DwarfLanguage language) { michael@0: switch (language) { michael@0: case dwarf2reader::DW_LANG_Java: michael@0: cu_context_->language = Language::Java; michael@0: break; michael@0: michael@0: // DWARF has no generic language code for assembly language; this is michael@0: // what the GNU toolchain uses. michael@0: case dwarf2reader::DW_LANG_Mips_Assembler: michael@0: cu_context_->language = Language::Assembler; michael@0: break; michael@0: michael@0: // C++ covers so many cases that it probably has some way to cope michael@0: // with whatever the other languages throw at us. So make it the michael@0: // default. michael@0: // michael@0: // Objective C and Objective C++ seem to create entries for michael@0: // methods whose DW_AT_name values are already fully-qualified: michael@0: // "-[Classname method:]". These appear at the top level. michael@0: // michael@0: // DWARF data for C should never include namespaces or functions michael@0: // nested in struct types, but if it ever does, then C++'s michael@0: // notation is probably not a bad choice for that. michael@0: default: michael@0: case dwarf2reader::DW_LANG_ObjC: michael@0: case dwarf2reader::DW_LANG_ObjC_plus_plus: michael@0: case dwarf2reader::DW_LANG_C: michael@0: case dwarf2reader::DW_LANG_C89: michael@0: case dwarf2reader::DW_LANG_C99: michael@0: case dwarf2reader::DW_LANG_C_plus_plus: michael@0: cu_context_->language = Language::CPlusPlus; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::ReadSourceLines(uint64 offset) { michael@0: const dwarf2reader::SectionMap §ion_map michael@0: = cu_context_->file_context->section_map; michael@0: dwarf2reader::SectionMap::const_iterator map_entry michael@0: = section_map.find(".debug_line"); michael@0: // Mac OS X puts DWARF data in sections whose names begin with "__" michael@0: // instead of ".". michael@0: if (map_entry == section_map.end()) michael@0: map_entry = section_map.find("__debug_line"); michael@0: if (map_entry == section_map.end()) { michael@0: cu_context_->reporter->MissingSection(".debug_line"); michael@0: return; michael@0: } michael@0: const char *section_start = map_entry->second.first; michael@0: uint64 section_length = map_entry->second.second; michael@0: if (offset >= section_length) { michael@0: cu_context_->reporter->BadLineInfoOffset(offset); michael@0: return; michael@0: } michael@0: line_reader_->ReadProgram(section_start + offset, section_length - offset, michael@0: cu_context_->file_context->module, &lines_); michael@0: } michael@0: michael@0: namespace { michael@0: // Return true if ADDRESS falls within the range of ITEM. michael@0: template michael@0: inline bool within(const T &item, Module::Address address) { michael@0: // Because Module::Address is unsigned, and unsigned arithmetic michael@0: // wraps around, this will be false if ADDRESS falls before the michael@0: // start of ITEM, or if it falls after ITEM's end. michael@0: return address - item.address < item.size; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::AssignLinesToFunctions() { michael@0: vector *functions = &cu_context_->functions; michael@0: WarningReporter *reporter = cu_context_->reporter; michael@0: michael@0: // This would be simpler if we assumed that source line entries michael@0: // don't cross function boundaries. However, there's no real reason michael@0: // to assume that (say) a series of function definitions on the same michael@0: // line wouldn't get coalesced into one line number entry. The michael@0: // DWARF spec certainly makes no such promises. michael@0: // michael@0: // So treat the functions and lines as peers, and take the trouble michael@0: // to compute their ranges' intersections precisely. In any case, michael@0: // the hair here is a constant factor for performance; the michael@0: // complexity from here on out is linear. michael@0: michael@0: // Put both our functions and lines in order by address. michael@0: std::sort(functions->begin(), functions->end(), michael@0: Module::Function::CompareByAddress); michael@0: std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress); michael@0: michael@0: // The last line that we used any piece of. We use this only for michael@0: // generating warnings. michael@0: const Module::Line *last_line_used = NULL; michael@0: michael@0: // The last function and line we warned about --- so we can avoid michael@0: // doing so more than once. michael@0: const Module::Function *last_function_cited = NULL; michael@0: const Module::Line *last_line_cited = NULL; michael@0: michael@0: // Make a single pass through both vectors from lower to higher michael@0: // addresses, populating each Function's lines vector with lines michael@0: // from our lines_ vector that fall within the function's address michael@0: // range. michael@0: vector::iterator func_it = functions->begin(); michael@0: vector::const_iterator line_it = lines_.begin(); michael@0: michael@0: Module::Address current; michael@0: michael@0: // Pointers to the referents of func_it and line_it, or NULL if the michael@0: // iterator is at the end of the sequence. michael@0: Module::Function *func; michael@0: const Module::Line *line; michael@0: michael@0: // Start current at the beginning of the first line or function, michael@0: // whichever is earlier. michael@0: if (func_it != functions->end() && line_it != lines_.end()) { michael@0: func = *func_it; michael@0: line = &*line_it; michael@0: current = std::min(func->address, line->address); michael@0: } else if (line_it != lines_.end()) { michael@0: func = NULL; michael@0: line = &*line_it; michael@0: current = line->address; michael@0: } else if (func_it != functions->end()) { michael@0: func = *func_it; michael@0: line = NULL; michael@0: current = (*func_it)->address; michael@0: } else { michael@0: return; michael@0: } michael@0: michael@0: while (func || line) { michael@0: // This loop has two invariants that hold at the top. michael@0: // michael@0: // First, at least one of the iterators is not at the end of its michael@0: // sequence, and those that are not refer to the earliest michael@0: // function or line that contains or starts after CURRENT. michael@0: // michael@0: // Note that every byte is in one of four states: it is covered michael@0: // or not covered by a function, and, independently, it is michael@0: // covered or not covered by a line. michael@0: // michael@0: // The second invariant is that CURRENT refers to a byte whose michael@0: // state is different from its predecessor, or it refers to the michael@0: // first byte in the address space. In other words, CURRENT is michael@0: // always the address of a transition. michael@0: // michael@0: // Note that, although each iteration advances CURRENT from one michael@0: // transition address to the next in each iteration, it might michael@0: // not advance the iterators. Suppose we have a function that michael@0: // starts with a line, has a gap, and then a second line, and michael@0: // suppose that we enter an iteration with CURRENT at the end of michael@0: // the first line. The next transition address is the start of michael@0: // the second line, after the gap, so the iteration should michael@0: // advance CURRENT to that point. At the head of that iteration, michael@0: // the invariants require that the line iterator be pointing at michael@0: // the second line. But this is also true at the head of the michael@0: // next. And clearly, the iteration must not change the function michael@0: // iterator. So neither iterator moves. michael@0: michael@0: // Assert the first invariant (see above). michael@0: assert(!func || current < func->address || within(*func, current)); michael@0: assert(!line || current < line->address || within(*line, current)); michael@0: michael@0: // The next transition after CURRENT. michael@0: Module::Address next_transition; michael@0: michael@0: // Figure out which state we're in, add lines or warn, and compute michael@0: // the next transition address. michael@0: if (func && current >= func->address) { michael@0: if (line && current >= line->address) { michael@0: // Covered by both a line and a function. michael@0: Module::Address func_left = func->size - (current - func->address); michael@0: Module::Address line_left = line->size - (current - line->address); michael@0: // This may overflow, but things work out. michael@0: next_transition = current + std::min(func_left, line_left); michael@0: Module::Line l = *line; michael@0: l.address = current; michael@0: l.size = next_transition - current; michael@0: func->lines.push_back(l); michael@0: last_line_used = line; michael@0: } else { michael@0: // Covered by a function, but no line. michael@0: if (func != last_function_cited) { michael@0: reporter->UncoveredFunction(*func); michael@0: last_function_cited = func; michael@0: } michael@0: if (line && within(*func, line->address)) michael@0: next_transition = line->address; michael@0: else michael@0: // If this overflows, we'll catch it below. michael@0: next_transition = func->address + func->size; michael@0: } michael@0: } else { michael@0: if (line && current >= line->address) { michael@0: // Covered by a line, but no function. michael@0: // michael@0: // If GCC emits padding after one function to align the start michael@0: // of the next, then it will attribute the padding michael@0: // instructions to the last source line of function (to reduce michael@0: // the size of the line number info), but omit it from the michael@0: // DW_AT_{low,high}_pc range given in .debug_info (since it michael@0: // costs nothing to be precise there). If we did use at least michael@0: // some of the line we're about to skip, and it ends at the michael@0: // start of the next function, then assume this is what michael@0: // happened, and don't warn. michael@0: if (line != last_line_cited michael@0: && !(func michael@0: && line == last_line_used michael@0: && func->address - line->address == line->size)) { michael@0: reporter->UncoveredLine(*line); michael@0: last_line_cited = line; michael@0: } michael@0: if (func && within(*line, func->address)) michael@0: next_transition = func->address; michael@0: else michael@0: // If this overflows, we'll catch it below. michael@0: next_transition = line->address + line->size; michael@0: } else { michael@0: // Covered by neither a function nor a line. By the invariant, michael@0: // both func and line begin after CURRENT. The next transition michael@0: // is the start of the next function or next line, whichever michael@0: // is earliest. michael@0: assert (func || line); michael@0: if (func && line) michael@0: next_transition = std::min(func->address, line->address); michael@0: else if (func) michael@0: next_transition = func->address; michael@0: else michael@0: next_transition = line->address; michael@0: } michael@0: } michael@0: michael@0: // If a function or line abuts the end of the address space, then michael@0: // next_transition may end up being zero, in which case we've completed michael@0: // our pass. Handle that here, instead of trying to deal with it in michael@0: // each place we compute next_transition. michael@0: if (!next_transition) michael@0: break; michael@0: michael@0: // Advance iterators as needed. If lines overlap or functions overlap, michael@0: // then we could go around more than once. We don't worry too much michael@0: // about what result we produce in that case, just as long as we don't michael@0: // hang or crash. michael@0: while (func_it != functions->end() michael@0: && next_transition >= (*func_it)->address michael@0: && !within(**func_it, next_transition)) michael@0: func_it++; michael@0: func = (func_it != functions->end()) ? *func_it : NULL; michael@0: while (line_it != lines_.end() michael@0: && next_transition >= line_it->address michael@0: && !within(*line_it, next_transition)) michael@0: line_it++; michael@0: line = (line_it != lines_.end()) ? &*line_it : NULL; michael@0: michael@0: // We must make progress. michael@0: assert(next_transition > current); michael@0: current = next_transition; michael@0: } michael@0: } michael@0: michael@0: void DwarfCUToModule::Finish() { michael@0: // Assembly language files have no function data, and that gives us michael@0: // no place to store our line numbers (even though the GNU toolchain michael@0: // will happily produce source line info for assembly language michael@0: // files). To avoid spurious warnings about lines we can't assign michael@0: // to functions, skip CUs in languages that lack functions. michael@0: if (!cu_context_->language->HasFunctions()) michael@0: return; michael@0: michael@0: // Read source line info, if we have any. michael@0: if (has_source_line_info_) michael@0: ReadSourceLines(source_line_offset_); michael@0: michael@0: vector *functions = &cu_context_->functions; michael@0: michael@0: // Dole out lines to the appropriate functions. michael@0: AssignLinesToFunctions(); michael@0: michael@0: // Add our functions, which now have source lines assigned to them, michael@0: // to module_. michael@0: cu_context_->file_context->module->AddFunctions(functions->begin(), michael@0: functions->end()); michael@0: michael@0: // Ownership of the function objects has shifted from cu_context to michael@0: // the Module. michael@0: functions->clear(); michael@0: } michael@0: michael@0: bool DwarfCUToModule::StartCompilationUnit(uint64 offset, michael@0: uint8 address_size, michael@0: uint8 offset_size, michael@0: uint64 cu_length, michael@0: uint8 dwarf_version) { michael@0: return dwarf_version >= 2; michael@0: } michael@0: michael@0: bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag) { michael@0: // We don't deal with partial compilation units (the only other tag michael@0: // likely to be used for root DIE). michael@0: return tag == dwarf2reader::DW_TAG_compile_unit; michael@0: } michael@0: michael@0: } // namespace google_breakpad