toolkit/crashreporter/google-breakpad/src/common/dwarf_cu_to_module.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 // Copyright (c) 2010 Google Inc.
     2 // All rights reserved.
     3 //
     4 // Redistribution and use in source and binary forms, with or without
     5 // modification, are permitted provided that the following conditions are
     6 // met:
     7 //
     8 //     * Redistributions of source code must retain the above copyright
     9 // notice, this list of conditions and the following disclaimer.
    10 //     * Redistributions in binary form must reproduce the above
    11 // copyright notice, this list of conditions and the following disclaimer
    12 // in the documentation and/or other materials provided with the
    13 // distribution.
    14 //     * Neither the name of Google Inc. nor the names of its
    15 // contributors may be used to endorse or promote products derived from
    16 // this software without specific prior written permission.
    17 //
    18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    30 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
    32 // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h.
    34 // For <inttypes.h> PRI* macros, before anything else might #include it.
    35 #ifndef __STDC_FORMAT_MACROS
    36 #define __STDC_FORMAT_MACROS
    37 #endif  /* __STDC_FORMAT_MACROS */
    39 #include "common/dwarf_cu_to_module.h"
    41 #include <assert.h>
    42 #if !defined(__ANDROID__)
    43 #include <cxxabi.h>
    44 #endif
    45 #include <inttypes.h>
    47 #include <algorithm>
    48 #include <set>
    49 #include <utility>
    50 #include <iomanip>
    52 #include "common/dwarf_line_to_module.h"
    53 #include "common/logging.h"
    55 namespace google_breakpad {
    57 using std::map;
    58 using std::pair;
    59 using std::set;
    60 using std::sort;
    61 using std::vector;
    63 // Data provided by a DWARF specification DIE.
    64 //
    65 // In DWARF, the DIE for a definition may contain a DW_AT_specification
    66 // attribute giving the offset of the corresponding declaration DIE, and
    67 // the definition DIE may omit information given in the declaration. For
    68 // example, it's common for a function's address range to appear only in
    69 // its definition DIE, but its name to appear only in its declaration
    70 // DIE.
    71 //
    72 // The dumper needs to be able to follow DW_AT_specification links to
    73 // bring all this information together in a FUNC record. Conveniently,
    74 // DIEs that are the target of such links have a DW_AT_declaration flag
    75 // set, so we can identify them when we first see them, and record their
    76 // contents for later reference.
    77 //
    78 // A Specification holds information gathered from a declaration DIE that
    79 // we may need if we find a DW_AT_specification link pointing to it.
    80 struct DwarfCUToModule::Specification {
    81   // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name.
    82   string qualified_name;
    84   // The name of the enclosing scope, or the empty string if there is none.
    85   string enclosing_name;
    87   // The name for the specification DIE itself, without any enclosing
    88   // name components.
    89   string unqualified_name;
    90 };
    92 // An abstract origin -- base definition of an inline function.
    93 struct AbstractOrigin {
    94   AbstractOrigin() : name() {}
    95   AbstractOrigin(const string& name) : name(name) {}
    97   string name;
    98 };
   100 typedef map<uint64, AbstractOrigin> AbstractOriginByOffset;
   102 // Data global to the DWARF-bearing file that is private to the
   103 // DWARF-to-Module process.
   104 struct DwarfCUToModule::FilePrivate {
   105   // A set of strings used in this CU. Before storing a string in one of
   106   // our data structures, insert it into this set, and then use the string
   107   // from the set.
   108   //
   109   // In some STL implementations, strings are reference-counted internally,
   110   // meaning that simply using strings from this set, even if passed by
   111   // value, assigned, or held directly in structures and containers
   112   // (map<string, ...>, for example), causes those strings to share a
   113   // single instance of each distinct piece of text. GNU's libstdc++ uses
   114   // reference counts, and I believe MSVC did as well, at some point.
   115   // However, C++ '11 implementations are moving away from reference
   116   // counting.
   117   //
   118   // In other implementations, string assignments copy the string's text,
   119   // so this set will actually hold yet another copy of the string (although
   120   // everything will still work). To improve memory consumption portably,
   121   // we will probably need to use pointers to strings held in this set.
   122   set<string> common_strings;
   124   // A map from offsets of DIEs within the .debug_info section to
   125   // Specifications describing those DIEs. Specification references can
   126   // cross compilation unit boundaries.
   127   SpecificationByOffset specifications;
   129   AbstractOriginByOffset origins;
   130 };
   132 DwarfCUToModule::FileContext::FileContext(const string &filename_arg,
   133                                           Module *module_arg)
   134     : filename(filename_arg), module(module_arg) {
   135   file_private = new FilePrivate();
   136 }
   138 DwarfCUToModule::FileContext::~FileContext() {
   139   delete file_private;
   140 }
   142 // Information global to the particular compilation unit we're
   143 // parsing. This is for data shared across the CU's entire DIE tree,
   144 // and parameters from the code invoking the CU parser.
   145 struct DwarfCUToModule::CUContext {
   146   CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg)
   147       : file_context(file_context_arg),
   148         reporter(reporter_arg),
   149         language(Language::CPlusPlus) { }
   150   ~CUContext() {
   151     for (vector<Module::Function *>::iterator it = functions.begin();
   152          it != functions.end(); it++)
   153       delete *it;
   154   };
   156   // The DWARF-bearing file into which this CU was incorporated.
   157   FileContext *file_context;
   159   // For printing error messages.
   160   WarningReporter *reporter;
   162   // The source language of this compilation unit.
   163   const Language *language;
   165   // The functions defined in this compilation unit. We accumulate
   166   // them here during parsing. Then, in DwarfCUToModule::Finish, we
   167   // assign them lines and add them to file_context->module.
   168   //
   169   // Destroying this destroys all the functions this vector points to.
   170   vector<Module::Function *> functions;
   171 };
   173 // Information about the context of a particular DIE. This is for
   174 // information that changes as we descend the tree towards the leaves:
   175 // the containing classes/namespaces, etc.
   176 struct DwarfCUToModule::DIEContext {
   177   // The fully-qualified name of the context. For example, for a
   178   // tree like:
   179   //
   180   // DW_TAG_namespace Foo
   181   //   DW_TAG_class Bar
   182   //     DW_TAG_subprogram Baz
   183   //
   184   // in a C++ compilation unit, the DIEContext's name for the
   185   // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's
   186   // name for the DW_TAG_namespace DIE would be "".
   187   string name;
   188 };
   190 // An abstract base class for all the dumper's DIE handlers.
   191 class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler {
   192  public:
   193   // Create a handler for the DIE at OFFSET whose compilation unit is
   194   // described by CU_CONTEXT, and whose immediate context is described
   195   // by PARENT_CONTEXT.
   196   GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context,
   197                     uint64 offset)
   198       : cu_context_(cu_context),
   199         parent_context_(parent_context),
   200         offset_(offset),
   201         declaration_(false),
   202         specification_(NULL) { }
   204   // Derived classes' ProcessAttributeUnsigned can defer to this to
   205   // handle DW_AT_declaration, or simply not override it.
   206   void ProcessAttributeUnsigned(enum DwarfAttribute attr,
   207                                 enum DwarfForm form,
   208                                 uint64 data);
   210   // Derived classes' ProcessAttributeReference can defer to this to
   211   // handle DW_AT_specification, or simply not override it.
   212   void ProcessAttributeReference(enum DwarfAttribute attr,
   213                                  enum DwarfForm form,
   214                                  uint64 data);
   216   // Derived classes' ProcessAttributeReference can defer to this to
   217   // handle DW_AT_specification, or simply not override it.
   218   void ProcessAttributeString(enum DwarfAttribute attr,
   219                               enum DwarfForm form,
   220                               const string &data);
   222  protected:
   223   // Compute and return the fully-qualified name of the DIE. If this
   224   // DIE is a declaration DIE, to be cited by other DIEs'
   225   // DW_AT_specification attributes, record its enclosing name and
   226   // unqualified name in the specification table.
   227   //
   228   // Use this from EndAttributes member functions, not ProcessAttribute*
   229   // functions; only the former can be sure that all the DIE's attributes
   230   // have been seen.
   231   string ComputeQualifiedName();
   233   CUContext *cu_context_;
   234   DIEContext *parent_context_;
   235   uint64 offset_;
   237   // Place the name in the global set of strings. Even though this looks
   238   // like a copy, all the major std::string implementations use reference
   239   // counting internally, so the effect is to have all the data structures
   240   // share copies of strings whenever possible.
   241   // FIXME: Should this return something like a string_ref to avoid the
   242   // assumption about how strings are implemented?
   243   string AddStringToPool(const string &str);
   245   // If this DIE has a DW_AT_declaration attribute, this is its value.
   246   // It is false on DIEs with no DW_AT_declaration attribute.
   247   bool declaration_;
   249   // If this DIE has a DW_AT_specification attribute, this is the
   250   // Specification structure for the DIE the attribute refers to.
   251   // Otherwise, this is NULL.
   252   Specification *specification_;
   254   // The value of the DW_AT_name attribute, or the empty string if the
   255   // DIE has no such attribute.
   256   string name_attribute_;
   258   // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty
   259   // string if the DIE has no such attribute or its content could not be
   260   // demangled.
   261   string demangled_name_;
   262 };
   264 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned(
   265     enum DwarfAttribute attr,
   266     enum DwarfForm form,
   267     uint64 data) {
   268   switch (attr) {
   269     case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break;
   270     default: break;
   271   }
   272 }
   274 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference(
   275     enum DwarfAttribute attr,
   276     enum DwarfForm form,
   277     uint64 data) {
   278   switch (attr) {
   279     case dwarf2reader::DW_AT_specification: {
   280       // Find the Specification to which this attribute refers, and
   281       // set specification_ appropriately. We could do more processing
   282       // here, but it's better to leave the real work to our
   283       // EndAttribute member function, at which point we know we have
   284       // seen all the DIE's attributes.
   285       FileContext *file_context = cu_context_->file_context;
   286       SpecificationByOffset *specifications
   287           = &file_context->file_private->specifications;
   288       SpecificationByOffset::iterator spec = specifications->find(data);
   289       if (spec != specifications->end()) {
   290         specification_ = &spec->second;
   291       } else {
   292         // Technically, there's no reason a DW_AT_specification
   293         // couldn't be a forward reference, but supporting that would
   294         // be a lot of work (changing to a two-pass structure), and I
   295         // don't think any producers we care about ever emit such
   296         // things.
   297         cu_context_->reporter->UnknownSpecification(offset_, data);
   298       }
   299       break;
   300     }
   301     default: break;
   302   }
   303 }
   305 string DwarfCUToModule::GenericDIEHandler::AddStringToPool(const string &str) {
   306   pair<set<string>::iterator, bool> result =
   307     cu_context_->file_context->file_private->common_strings.insert(str);
   308   return *result.first;
   309 }
   311 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
   312     enum DwarfAttribute attr,
   313     enum DwarfForm form,
   314     const string &data) {
   315   switch (attr) {
   316     case dwarf2reader::DW_AT_name:
   317       name_attribute_ = AddStringToPool(data);
   318       break;
   319     case dwarf2reader::DW_AT_MIPS_linkage_name: {
   320       char* demangled = NULL;
   321 #if !defined(__ANDROID__)
   322       demangled = abi::__cxa_demangle(data.c_str(), NULL, NULL, NULL);
   323 #endif
   324       if (demangled) {
   325         demangled_name_ = AddStringToPool(demangled);
   326         free(reinterpret_cast<void*>(demangled));
   327       }
   328       break;
   329     }
   330     default: break;
   331   }
   332 }
   334 string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
   335   // Use the demangled name, if one is available. Demangled names are
   336   // preferable to those inferred from the DWARF structure because they
   337   // include argument types.
   338   const string *qualified_name = NULL;
   339   if (!demangled_name_.empty()) {
   340     // Found it is this DIE.
   341     qualified_name = &demangled_name_;
   342   } else if (specification_ && !specification_->qualified_name.empty()) {
   343     // Found it on the specification.
   344     qualified_name = &specification_->qualified_name;
   345   }
   347   const string *unqualified_name;
   348   const string *enclosing_name;
   349   if (!qualified_name) {
   350     // Find our unqualified name. If the DIE has its own DW_AT_name
   351     // attribute, then use that; otherwise, check our specification.
   352     if (name_attribute_.empty() && specification_)
   353       unqualified_name = &specification_->unqualified_name;
   354     else
   355       unqualified_name = &name_attribute_;
   357     // Find the name of our enclosing context. If we have a
   358     // specification, it's the specification's enclosing context that
   359     // counts; otherwise, use this DIE's context.
   360     if (specification_)
   361       enclosing_name = &specification_->enclosing_name;
   362     else
   363       enclosing_name = &parent_context_->name;
   364   }
   366   // If this DIE was marked as a declaration, record its names in the
   367   // specification table.
   368   if (declaration_) {
   369     FileContext *file_context = cu_context_->file_context;
   370     Specification spec;
   371     if (qualified_name)
   372       spec.qualified_name = *qualified_name;
   373     else {
   374       spec.enclosing_name = *enclosing_name;
   375       spec.unqualified_name = *unqualified_name;
   376     }
   377     file_context->file_private->specifications[offset_] = spec;
   378   }
   380   if (qualified_name)
   381     return *qualified_name;
   383   // Combine the enclosing name and unqualified name to produce our
   384   // own fully-qualified name.
   385   return cu_context_->language->MakeQualifiedName(*enclosing_name,
   386                                                   *unqualified_name);
   387 }
   389 // A handler class for DW_TAG_subprogram DIEs.
   390 class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
   391  public:
   392   FuncHandler(CUContext *cu_context, DIEContext *parent_context,
   393               uint64 offset)
   394       : GenericDIEHandler(cu_context, parent_context, offset),
   395         low_pc_(0), high_pc_(0), high_pc_form_(dwarf2reader::DW_FORM_addr),
   396         abstract_origin_(NULL), inline_(false) { }
   397   void ProcessAttributeUnsigned(enum DwarfAttribute attr,
   398                                 enum DwarfForm form,
   399                                 uint64 data);
   400   void ProcessAttributeSigned(enum DwarfAttribute attr,
   401                               enum DwarfForm form,
   402                               int64 data);
   403   void ProcessAttributeReference(enum DwarfAttribute attr,
   404                                  enum DwarfForm form,
   405                                  uint64 data);
   407   bool EndAttributes();
   408   void Finish();
   410  private:
   411   // The fully-qualified name, as derived from name_attribute_,
   412   // specification_, parent_context_.  Computed in EndAttributes.
   413   string name_;
   414   uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc
   415   DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
   416   const AbstractOrigin* abstract_origin_;
   417   bool inline_;
   418 };
   420 void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned(
   421     enum DwarfAttribute attr,
   422     enum DwarfForm form,
   423     uint64 data) {
   424   switch (attr) {
   425     // If this attribute is present at all --- even if its value is
   426     // DW_INL_not_inlined --- then GCC may cite it as someone else's
   427     // DW_AT_abstract_origin attribute.
   428     case dwarf2reader::DW_AT_inline:      inline_  = true; break;
   430     case dwarf2reader::DW_AT_low_pc:      low_pc_  = data; break;
   431     case dwarf2reader::DW_AT_high_pc:
   432       high_pc_form_ = form;
   433       high_pc_ = data;
   434       break;
   436     default:
   437       GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
   438       break;
   439   }
   440 }
   442 void DwarfCUToModule::FuncHandler::ProcessAttributeSigned(
   443     enum DwarfAttribute attr,
   444     enum DwarfForm form,
   445     int64 data) {
   446   switch (attr) {
   447     // If this attribute is present at all --- even if its value is
   448     // DW_INL_not_inlined --- then GCC may cite it as someone else's
   449     // DW_AT_abstract_origin attribute.
   450     case dwarf2reader::DW_AT_inline:      inline_  = true; break;
   452     default:
   453       break;
   454   }
   455 }
   457 void DwarfCUToModule::FuncHandler::ProcessAttributeReference(
   458     enum DwarfAttribute attr,
   459     enum DwarfForm form,
   460     uint64 data) {
   461   switch(attr) {
   462     case dwarf2reader::DW_AT_abstract_origin: {
   463       const AbstractOriginByOffset& origins =
   464           cu_context_->file_context->file_private->origins;
   465       AbstractOriginByOffset::const_iterator origin = origins.find(data);
   466       if (origin != origins.end()) {
   467         abstract_origin_ = &(origin->second);
   468       } else {
   469         cu_context_->reporter->UnknownAbstractOrigin(offset_, data);
   470       }
   471       break;
   472     }
   473     default:
   474       GenericDIEHandler::ProcessAttributeReference(attr, form, data);
   475       break;
   476   }
   477 }
   479 bool DwarfCUToModule::FuncHandler::EndAttributes() {
   480   // Compute our name, and record a specification, if appropriate.
   481   name_ = ComputeQualifiedName();
   482   if (name_.empty() && abstract_origin_) {
   483     name_ = abstract_origin_->name;
   484   }
   485   return true;
   486 }
   488 void DwarfCUToModule::FuncHandler::Finish() {
   489   // Make high_pc_ an address, if it isn't already.
   490   if (high_pc_form_ != dwarf2reader::DW_FORM_addr) {
   491     high_pc_ += low_pc_;
   492   }
   494   // Did we collect the information we need?  Not all DWARF function
   495   // entries have low and high addresses (for example, inlined
   496   // functions that were never used), but all the ones we're
   497   // interested in cover a non-empty range of bytes.
   498   if (low_pc_ < high_pc_) {
   499     // Create a Module::Function based on the data we've gathered, and
   500     // add it to the functions_ list.
   501     Module::Function *func = new Module::Function;
   502     // Malformed DWARF may omit the name, but all Module::Functions must
   503     // have names.
   504     if (!name_.empty()) {
   505       func->name = name_;
   506     } else {
   507       cu_context_->reporter->UnnamedFunction(offset_);
   508       func->name = "<name omitted>";
   509     }
   510     func->address = low_pc_;
   511     func->size = high_pc_ - low_pc_;
   512     func->parameter_size = 0;
   513     if (func->address) {
   514        // If the function address is zero this is a sign that this function
   515        // description is just empty debug data and should just be discarded.
   516        cu_context_->functions.push_back(func);
   517      }
   518   } else if (inline_) {
   519     AbstractOrigin origin(name_);
   520     cu_context_->file_context->file_private->origins[offset_] = origin;
   521   }
   522 }
   524 // A handler for DIEs that contain functions and contribute a
   525 // component to their names: namespaces, classes, etc.
   526 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler {
   527  public:
   528   NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context,
   529                     uint64 offset)
   530       : GenericDIEHandler(cu_context, parent_context, offset) { }
   531   bool EndAttributes();
   532   DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag);
   534  private:
   535   DIEContext child_context_; // A context for our children.
   536 };
   538 bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
   539   child_context_.name = ComputeQualifiedName();
   540   return true;
   541 }
   543 dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler(
   544     uint64 offset,
   545     enum DwarfTag tag) {
   546   switch (tag) {
   547     case dwarf2reader::DW_TAG_subprogram:
   548       return new FuncHandler(cu_context_, &child_context_, offset);
   549     case dwarf2reader::DW_TAG_namespace:
   550     case dwarf2reader::DW_TAG_class_type:
   551     case dwarf2reader::DW_TAG_structure_type:
   552     case dwarf2reader::DW_TAG_union_type:
   553       return new NamedScopeHandler(cu_context_, &child_context_, offset);
   554     default:
   555       return NULL;
   556   }
   557 }
   559 void DwarfCUToModule::WarningReporter::CUHeading() {
   560   if (printed_cu_header_)
   561     return;
   562   BPLOG(INFO)
   563     << filename_ << ": in compilation unit '" << cu_name_
   564     << "' (offset 0x" << std::setbase(16) << cu_offset_ << std::setbase(10)
   565     << "):";
   566   printed_cu_header_ = true;
   567 }
   569 void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset,
   570                                                             uint64 target) {
   571   CUHeading();
   572   BPLOG(INFO)
   573     << filename_ << ": the DIE at offset 0x" 
   574     << std::setbase(16) << offset << std::setbase(10)
   575     << " has a DW_AT_specification attribute referring to the die at offset 0x"
   576     << std::setbase(16) << target << std::setbase(10)
   577     << ", which either was not marked as a declaration, or comes "
   578     << "later in the file";
   579 }
   581 void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset,
   582                                                              uint64 target) {
   583   CUHeading();
   584   BPLOG(INFO)
   585     << filename_ << ": the DIE at offset 0x" 
   586     << std::setbase(16) << offset << std::setbase(10)
   587     << " has a DW_AT_abstract_origin attribute referring to the die at"
   588     << " offset 0x" << std::setbase(16) << target << std::setbase(10)
   589     << ", which either was not marked as an inline, or comes "
   590     << "later in the file";
   591 }
   593 void DwarfCUToModule::WarningReporter::MissingSection(const string &name) {
   594   CUHeading();
   595   BPLOG(INFO) << filename_ << ": warning: couldn't find DWARF '"
   596     << name << "' section";
   597 }
   599 void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) {
   600   CUHeading();
   601   BPLOG(INFO) << filename_ << ": warning: line number data offset beyond "
   602     << "end of '.debug_line' section";
   603 }
   605 void DwarfCUToModule::WarningReporter::UncoveredHeading() {
   606   if (printed_unpaired_header_)
   607     return;
   608   CUHeading();
   609   BPLOG(INFO) << filename_ << ": warning: skipping unpaired lines/functions:";
   610   printed_unpaired_header_ = true;
   611 }
   613 void DwarfCUToModule::WarningReporter::UncoveredFunction(
   614     const Module::Function &function) {
   615   if (!uncovered_warnings_enabled_)
   616     return;
   617   UncoveredHeading();
   618   BPLOG(INFO) << "    function" << (function.size == 0 ? " (zero-length)" : "")
   619     << ": " << function.name;
   620 }
   622 void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) {
   623   if (!uncovered_warnings_enabled_)
   624     return;
   625   UncoveredHeading();
   626   BPLOG(INFO) << "    line" << (line.size == 0 ? " (zero-length)" : "")
   627     << ": " << line.file->name << ":" << line.number
   628     << " at 0x" << std::setbase(16) << line.address << std::setbase(10);
   629 }
   631 void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64 offset) {
   632   CUHeading();
   633   BPLOG(INFO) << filename_ << ": warning: function at offset 0x"
   634     << std::setbase(16) << offset << std::setbase(10) << " has no name";
   635 }
   637 DwarfCUToModule::DwarfCUToModule(FileContext *file_context,
   638                                  LineToModuleHandler *line_reader,
   639                                  WarningReporter *reporter)
   640     : line_reader_(line_reader), has_source_line_info_(false) { 
   641   cu_context_ = new CUContext(file_context, reporter);
   642   child_context_ = new DIEContext();
   643 }
   645 DwarfCUToModule::~DwarfCUToModule() {
   646   delete cu_context_;
   647   delete child_context_;
   648 }
   650 void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr,
   651                                              enum DwarfForm form,
   652                                              int64 data) {
   653   switch (attr) {
   654     case dwarf2reader::DW_AT_language: // source language of this CU
   655       SetLanguage(static_cast<DwarfLanguage>(data));
   656       break;
   657     default:
   658       break;
   659   }
   660 }
   662 void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
   663                                                enum DwarfForm form,
   664                                                uint64 data) {
   665   switch (attr) {
   666     case dwarf2reader::DW_AT_stmt_list: // Line number information.
   667       has_source_line_info_ = true;
   668       source_line_offset_ = data;
   669       break;
   670     case dwarf2reader::DW_AT_language: // source language of this CU
   671       SetLanguage(static_cast<DwarfLanguage>(data));
   672       break;
   673     default:
   674       break;
   675   }
   676 }
   678 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr,
   679                                              enum DwarfForm form,
   680                                              const string &data) {
   681   switch (attr) {
   682     case dwarf2reader::DW_AT_name:
   683       cu_context_->reporter->SetCUName(data);
   684       break;
   685     case dwarf2reader::DW_AT_comp_dir:
   686       line_reader_->StartCompilationUnit(data);
   687       break;
   688     default:
   689       break;
   690   }
   691 }
   693 bool DwarfCUToModule::EndAttributes() {
   694   return true;
   695 }
   697 dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler(
   698     uint64 offset,
   699     enum DwarfTag tag) {
   700   switch (tag) {
   701     case dwarf2reader::DW_TAG_subprogram:
   702       return new FuncHandler(cu_context_, child_context_, offset);
   703     case dwarf2reader::DW_TAG_namespace:
   704     case dwarf2reader::DW_TAG_class_type:
   705     case dwarf2reader::DW_TAG_structure_type:
   706     case dwarf2reader::DW_TAG_union_type:
   707       return new NamedScopeHandler(cu_context_, child_context_, offset);
   708     default:
   709       return NULL;
   710   }
   711 }
   713 void DwarfCUToModule::SetLanguage(DwarfLanguage language) {
   714   switch (language) {
   715     case dwarf2reader::DW_LANG_Java:
   716       cu_context_->language = Language::Java;
   717       break;
   719     // DWARF has no generic language code for assembly language; this is
   720     // what the GNU toolchain uses.
   721     case dwarf2reader::DW_LANG_Mips_Assembler:
   722       cu_context_->language = Language::Assembler;
   723       break;
   725     // C++ covers so many cases that it probably has some way to cope
   726     // with whatever the other languages throw at us. So make it the
   727     // default.
   728     //
   729     // Objective C and Objective C++ seem to create entries for
   730     // methods whose DW_AT_name values are already fully-qualified:
   731     // "-[Classname method:]".  These appear at the top level.
   732     // 
   733     // DWARF data for C should never include namespaces or functions
   734     // nested in struct types, but if it ever does, then C++'s
   735     // notation is probably not a bad choice for that.
   736     default:
   737     case dwarf2reader::DW_LANG_ObjC:
   738     case dwarf2reader::DW_LANG_ObjC_plus_plus:
   739     case dwarf2reader::DW_LANG_C:
   740     case dwarf2reader::DW_LANG_C89:
   741     case dwarf2reader::DW_LANG_C99:
   742     case dwarf2reader::DW_LANG_C_plus_plus:
   743       cu_context_->language = Language::CPlusPlus;
   744       break;
   745   }
   746 }
   748 void DwarfCUToModule::ReadSourceLines(uint64 offset) {
   749   const dwarf2reader::SectionMap &section_map
   750       = cu_context_->file_context->section_map;
   751   dwarf2reader::SectionMap::const_iterator map_entry
   752       = section_map.find(".debug_line");
   753   // Mac OS X puts DWARF data in sections whose names begin with "__"
   754   // instead of ".".
   755   if (map_entry == section_map.end())
   756     map_entry = section_map.find("__debug_line");
   757   if (map_entry == section_map.end()) {
   758     cu_context_->reporter->MissingSection(".debug_line");
   759     return;
   760   }
   761   const char *section_start = map_entry->second.first;
   762   uint64 section_length = map_entry->second.second;
   763   if (offset >= section_length) {
   764     cu_context_->reporter->BadLineInfoOffset(offset);
   765     return;
   766   }
   767   line_reader_->ReadProgram(section_start + offset, section_length - offset,
   768                             cu_context_->file_context->module, &lines_);
   769 }
   771 namespace {
   772 // Return true if ADDRESS falls within the range of ITEM.
   773 template <class T>
   774 inline bool within(const T &item, Module::Address address) {
   775   // Because Module::Address is unsigned, and unsigned arithmetic
   776   // wraps around, this will be false if ADDRESS falls before the
   777   // start of ITEM, or if it falls after ITEM's end.
   778   return address - item.address < item.size;
   779 }
   780 }
   782 void DwarfCUToModule::AssignLinesToFunctions() {
   783   vector<Module::Function *> *functions = &cu_context_->functions;
   784   WarningReporter *reporter = cu_context_->reporter;
   786   // This would be simpler if we assumed that source line entries
   787   // don't cross function boundaries.  However, there's no real reason
   788   // to assume that (say) a series of function definitions on the same
   789   // line wouldn't get coalesced into one line number entry.  The
   790   // DWARF spec certainly makes no such promises.
   791   //
   792   // So treat the functions and lines as peers, and take the trouble
   793   // to compute their ranges' intersections precisely.  In any case,
   794   // the hair here is a constant factor for performance; the
   795   // complexity from here on out is linear.
   797   // Put both our functions and lines in order by address.
   798   std::sort(functions->begin(), functions->end(),
   799             Module::Function::CompareByAddress);
   800   std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress);
   802   // The last line that we used any piece of.  We use this only for
   803   // generating warnings.
   804   const Module::Line *last_line_used = NULL;
   806   // The last function and line we warned about --- so we can avoid
   807   // doing so more than once.
   808   const Module::Function *last_function_cited = NULL;
   809   const Module::Line *last_line_cited = NULL;
   811   // Make a single pass through both vectors from lower to higher
   812   // addresses, populating each Function's lines vector with lines
   813   // from our lines_ vector that fall within the function's address
   814   // range.
   815   vector<Module::Function *>::iterator func_it = functions->begin();
   816   vector<Module::Line>::const_iterator line_it = lines_.begin();
   818   Module::Address current;
   820   // Pointers to the referents of func_it and line_it, or NULL if the
   821   // iterator is at the end of the sequence.
   822   Module::Function *func;
   823   const Module::Line *line;
   825   // Start current at the beginning of the first line or function,
   826   // whichever is earlier.
   827   if (func_it != functions->end() && line_it != lines_.end()) {
   828     func = *func_it;
   829     line = &*line_it;
   830     current = std::min(func->address, line->address);
   831   } else if (line_it != lines_.end()) {
   832     func = NULL;
   833     line = &*line_it;
   834     current = line->address;
   835   } else if (func_it != functions->end()) {
   836     func = *func_it;
   837     line = NULL;
   838     current = (*func_it)->address;
   839   } else {
   840     return;
   841   }
   843   while (func || line) {
   844     // This loop has two invariants that hold at the top.
   845     //
   846     // First, at least one of the iterators is not at the end of its
   847     // sequence, and those that are not refer to the earliest
   848     // function or line that contains or starts after CURRENT.
   849     //
   850     // Note that every byte is in one of four states: it is covered
   851     // or not covered by a function, and, independently, it is
   852     // covered or not covered by a line.
   853     //
   854     // The second invariant is that CURRENT refers to a byte whose
   855     // state is different from its predecessor, or it refers to the
   856     // first byte in the address space. In other words, CURRENT is
   857     // always the address of a transition.
   858     //
   859     // Note that, although each iteration advances CURRENT from one
   860     // transition address to the next in each iteration, it might
   861     // not advance the iterators. Suppose we have a function that
   862     // starts with a line, has a gap, and then a second line, and
   863     // suppose that we enter an iteration with CURRENT at the end of
   864     // the first line. The next transition address is the start of
   865     // the second line, after the gap, so the iteration should
   866     // advance CURRENT to that point. At the head of that iteration,
   867     // the invariants require that the line iterator be pointing at
   868     // the second line. But this is also true at the head of the
   869     // next. And clearly, the iteration must not change the function
   870     // iterator. So neither iterator moves.
   872     // Assert the first invariant (see above).
   873     assert(!func || current < func->address || within(*func, current));
   874     assert(!line || current < line->address || within(*line, current));
   876     // The next transition after CURRENT.
   877     Module::Address next_transition;
   879     // Figure out which state we're in, add lines or warn, and compute
   880     // the next transition address.
   881     if (func && current >= func->address) {
   882       if (line && current >= line->address) {
   883         // Covered by both a line and a function.
   884         Module::Address func_left = func->size - (current - func->address);
   885         Module::Address line_left = line->size - (current - line->address);
   886         // This may overflow, but things work out.
   887         next_transition = current + std::min(func_left, line_left);
   888         Module::Line l = *line;
   889         l.address = current;
   890         l.size = next_transition - current;
   891         func->lines.push_back(l);
   892         last_line_used = line;
   893       } else {
   894         // Covered by a function, but no line.
   895         if (func != last_function_cited) {
   896           reporter->UncoveredFunction(*func);
   897           last_function_cited = func;
   898         }
   899         if (line && within(*func, line->address))
   900           next_transition = line->address;
   901         else
   902           // If this overflows, we'll catch it below.
   903           next_transition = func->address + func->size;
   904       }
   905     } else {
   906       if (line && current >= line->address) {
   907         // Covered by a line, but no function.
   908         //
   909         // If GCC emits padding after one function to align the start
   910         // of the next, then it will attribute the padding
   911         // instructions to the last source line of function (to reduce
   912         // the size of the line number info), but omit it from the
   913         // DW_AT_{low,high}_pc range given in .debug_info (since it
   914         // costs nothing to be precise there). If we did use at least
   915         // some of the line we're about to skip, and it ends at the
   916         // start of the next function, then assume this is what
   917         // happened, and don't warn.
   918         if (line != last_line_cited
   919             && !(func
   920                  && line == last_line_used
   921                  && func->address - line->address == line->size)) {
   922           reporter->UncoveredLine(*line);
   923           last_line_cited = line;
   924         }
   925         if (func && within(*line, func->address))
   926           next_transition = func->address;
   927         else
   928           // If this overflows, we'll catch it below.
   929           next_transition = line->address + line->size;
   930       } else {
   931         // Covered by neither a function nor a line. By the invariant,
   932         // both func and line begin after CURRENT. The next transition
   933         // is the start of the next function or next line, whichever
   934         // is earliest.
   935         assert (func || line);
   936         if (func && line)
   937           next_transition = std::min(func->address, line->address);
   938         else if (func)
   939           next_transition = func->address;
   940         else
   941           next_transition = line->address;
   942       }
   943     }
   945     // If a function or line abuts the end of the address space, then
   946     // next_transition may end up being zero, in which case we've completed
   947     // our pass. Handle that here, instead of trying to deal with it in
   948     // each place we compute next_transition.
   949     if (!next_transition)
   950       break;
   952     // Advance iterators as needed. If lines overlap or functions overlap,
   953     // then we could go around more than once. We don't worry too much
   954     // about what result we produce in that case, just as long as we don't
   955     // hang or crash.
   956     while (func_it != functions->end()
   957            && next_transition >= (*func_it)->address
   958            && !within(**func_it, next_transition))
   959       func_it++;
   960     func = (func_it != functions->end()) ? *func_it : NULL;
   961     while (line_it != lines_.end()
   962            && next_transition >= line_it->address
   963            && !within(*line_it, next_transition))
   964       line_it++;
   965     line = (line_it != lines_.end()) ? &*line_it : NULL;
   967     // We must make progress.
   968     assert(next_transition > current);
   969     current = next_transition;
   970   }
   971 }
   973 void DwarfCUToModule::Finish() {
   974   // Assembly language files have no function data, and that gives us
   975   // no place to store our line numbers (even though the GNU toolchain
   976   // will happily produce source line info for assembly language
   977   // files).  To avoid spurious warnings about lines we can't assign
   978   // to functions, skip CUs in languages that lack functions.
   979   if (!cu_context_->language->HasFunctions())
   980     return;
   982   // Read source line info, if we have any.
   983   if (has_source_line_info_)
   984     ReadSourceLines(source_line_offset_);
   986   vector<Module::Function *> *functions = &cu_context_->functions;
   988   // Dole out lines to the appropriate functions.
   989   AssignLinesToFunctions();
   991   // Add our functions, which now have source lines assigned to them,
   992   // to module_.
   993   cu_context_->file_context->module->AddFunctions(functions->begin(),
   994                                                   functions->end());
   996   // Ownership of the function objects has shifted from cu_context to
   997   // the Module.
   998   functions->clear();
   999 }
  1001 bool DwarfCUToModule::StartCompilationUnit(uint64 offset,
  1002                                            uint8 address_size,
  1003                                            uint8 offset_size,
  1004                                            uint64 cu_length,
  1005                                            uint8 dwarf_version) {
  1006   return dwarf_version >= 2;
  1009 bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag) {
  1010   // We don't deal with partial compilation units (the only other tag
  1011   // likely to be used for root DIE).
  1012   return tag == dwarf2reader::DW_TAG_compile_unit;
  1015 } // namespace google_breakpad

mercurial