michael@0: // Copyright (c) 2010 Google Inc. All Rights Reserved.
michael@0: //
michael@0: // Redistribution and use in source and binary forms, with or without
michael@0: // modification, are permitted provided that the following conditions are
michael@0: // met:
michael@0: //
michael@0: //     * Redistributions of source code must retain the above copyright
michael@0: // notice, this list of conditions and the following disclaimer.
michael@0: //     * Redistributions in binary form must reproduce the above
michael@0: // copyright notice, this list of conditions and the following disclaimer
michael@0: // in the documentation and/or other materials provided with the
michael@0: // distribution.
michael@0: //     * Neither the name of Google Inc. nor the names of its
michael@0: // contributors may be used to endorse or promote products derived from
michael@0: // this software without specific prior written permission.
michael@0: //
michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0: 
michael@0: // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
michael@0: 
michael@0: // This file implements the google_breakpad::StabsReader class.
michael@0: // See stabs_reader.h.
michael@0: 
michael@0: #include "common/stabs_reader.h"
michael@0: 
michael@0: #include <assert.h>
michael@0: #include <stab.h>
michael@0: #include <string.h>
michael@0: 
michael@0: #include <string>
michael@0: 
michael@0: #include "common/using_std_string.h"
michael@0: 
michael@0: using std::vector;
michael@0: 
michael@0: namespace google_breakpad {
michael@0: 
michael@0: StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer,
michael@0:                                           bool big_endian, size_t value_size)
michael@0:     : value_size_(value_size), cursor_(buffer, big_endian) {
michael@0:   // Actually, we could handle weird sizes just fine, but they're
michael@0:   // probably mistakes --- expressed in bits, say.
michael@0:   assert(value_size == 4 || value_size == 8);
michael@0:   entry_.index = 0;
michael@0:   Fetch();
michael@0: }
michael@0: 
michael@0: void StabsReader::EntryIterator::Fetch() {
michael@0:   cursor_
michael@0:       .Read(4, false, &entry_.name_offset)
michael@0:       .Read(1, false, &entry_.type)
michael@0:       .Read(1, false, &entry_.other)
michael@0:       .Read(2, false, &entry_.descriptor)
michael@0:       .Read(value_size_, false, &entry_.value);
michael@0:   entry_.at_end = !cursor_;
michael@0: }
michael@0: 
michael@0: StabsReader::StabsReader(const uint8_t *stab,    size_t stab_size,
michael@0:                          const uint8_t *stabstr, size_t stabstr_size,
michael@0:                          bool big_endian, size_t value_size, bool unitized,
michael@0:                          StabsHandler *handler)
michael@0:     : entries_(stab, stab_size),
michael@0:       strings_(stabstr, stabstr_size),
michael@0:       iterator_(&entries_, big_endian, value_size),
michael@0:       unitized_(unitized),
michael@0:       handler_(handler),
michael@0:       string_offset_(0),
michael@0:       next_cu_string_offset_(0),
michael@0:       current_source_file_(NULL) { }
michael@0: 
michael@0: const char *StabsReader::SymbolString() {
michael@0:   ptrdiff_t offset = string_offset_ + iterator_->name_offset;
michael@0:   if (offset < 0 || (size_t) offset >= strings_.Size()) {
michael@0:     handler_->Warning("symbol %d: name offset outside the string section\n",
michael@0:                       iterator_->index);
michael@0:     // Return our null string, to keep our promise about all names being
michael@0:     // taken from the string section.
michael@0:     offset = 0;
michael@0:   }
michael@0:   return reinterpret_cast<const char *>(strings_.start + offset);
michael@0: }
michael@0: 
michael@0: bool StabsReader::Process() {
michael@0:   while (!iterator_->at_end) {
michael@0:     if (iterator_->type == N_SO) {
michael@0:       if (! ProcessCompilationUnit())
michael@0:         return false;
michael@0:     } else if (iterator_->type == N_UNDF && unitized_) {
michael@0:       // In unitized STABS (including Linux STABS, and pretty much anything
michael@0:       // else that puts STABS data in sections), at the head of each
michael@0:       // compilation unit's entries there is an N_UNDF stab giving the
michael@0:       // number of symbols in the compilation unit, and the number of bytes
michael@0:       // that compilation unit's strings take up in the .stabstr section.
michael@0:       // Each CU's strings are separate; the n_strx values are offsets
michael@0:       // within the current CU's portion of the .stabstr section.
michael@0:       //
michael@0:       // As an optimization, the GNU linker combines all the
michael@0:       // compilation units into one, with a single N_UNDF at the
michael@0:       // beginning. However, other linkers, like Gold, do not perform
michael@0:       // this optimization.
michael@0:       string_offset_ = next_cu_string_offset_;
michael@0:       next_cu_string_offset_ = iterator_->value;
michael@0:       ++iterator_;
michael@0:     }
michael@0: #if defined(HAVE_MACH_O_NLIST_H)
michael@0:     // Export symbols in Mach-O binaries look like this.
michael@0:     // This is necessary in order to be able to dump symbols
michael@0:     // from OS X system libraries.
michael@0:     else if ((iterator_->type & N_STAB) == 0 &&
michael@0:                (iterator_->type & N_TYPE) == N_SECT) {
michael@0:       ProcessExtern();
michael@0:     }
michael@0: #endif
michael@0:     else {
michael@0:       ++iterator_;
michael@0:     }
michael@0:   }
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: bool StabsReader::ProcessCompilationUnit() {
michael@0:   assert(!iterator_->at_end && iterator_->type == N_SO);
michael@0: 
michael@0:   // There may be an N_SO entry whose name ends with a slash,
michael@0:   // indicating the directory in which the compilation occurred.
michael@0:   // The build directory defaults to NULL.
michael@0:   const char *build_directory = NULL;
michael@0:   {
michael@0:     const char *name = SymbolString();
michael@0:     if (name[0] && name[strlen(name) - 1] == '/') {
michael@0:       build_directory = name;
michael@0:       ++iterator_;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   // We expect to see an N_SO entry with a filename next, indicating
michael@0:   // the start of the compilation unit.
michael@0:   {
michael@0:     if (iterator_->at_end || iterator_->type != N_SO)
michael@0:       return true;
michael@0:     const char *name = SymbolString();
michael@0:     if (name[0] == '\0') {
michael@0:       // This seems to be a stray end-of-compilation-unit marker;
michael@0:       // consume it, but don't report the end, since we didn't see a
michael@0:       // beginning.
michael@0:       ++iterator_;
michael@0:       return true;
michael@0:     }
michael@0:     current_source_file_ = name;
michael@0:   }
michael@0: 
michael@0:   if (! handler_->StartCompilationUnit(current_source_file_,
michael@0:                                        iterator_->value,
michael@0:                                        build_directory))
michael@0:     return false;
michael@0: 
michael@0:   ++iterator_;
michael@0: 
michael@0:   // The STABS documentation says that some compilers may emit
michael@0:   // additional N_SO entries with names immediately following the
michael@0:   // first, and that they should be ignored.  However, the original
michael@0:   // Breakpad STABS reader doesn't ignore them, so we won't either.
michael@0: 
michael@0:   // Process the body of the compilation unit, up to the next N_SO.
michael@0:   while (!iterator_->at_end && iterator_->type != N_SO) {
michael@0:     if (iterator_->type == N_FUN) {
michael@0:       if (! ProcessFunction())
michael@0:         return false;
michael@0:     } else if (iterator_->type == N_SLINE) {
michael@0:       // Mac OS X STABS place SLINE records before functions.
michael@0:       Line line;
michael@0:       // The value of an N_SLINE entry that appears outside a function is
michael@0:       // the absolute address of the line.
michael@0:       line.address = iterator_->value;
michael@0:       line.filename = current_source_file_;
michael@0:       // The n_desc of a N_SLINE entry is the line number.  It's a
michael@0:       // signed 16-bit field; line numbers from 32768 to 65535 are
michael@0:       // stored as n-65536.
michael@0:       line.number = (uint16_t) iterator_->descriptor;
michael@0:       queued_lines_.push_back(line);
michael@0:       ++iterator_;
michael@0:     } else if (iterator_->type == N_SOL) {
michael@0:       current_source_file_ = SymbolString();
michael@0:       ++iterator_;
michael@0:     } else {
michael@0:       // Ignore anything else.
michael@0:       ++iterator_;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   // An N_SO with an empty name indicates the end of the compilation
michael@0:   // unit.  Default to zero.
michael@0:   uint64_t ending_address = 0;
michael@0:   if (!iterator_->at_end) {
michael@0:     assert(iterator_->type == N_SO);
michael@0:     const char *name = SymbolString();
michael@0:     if (name[0] == '\0') {
michael@0:       ending_address = iterator_->value;
michael@0:       ++iterator_;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   if (! handler_->EndCompilationUnit(ending_address))
michael@0:     return false;
michael@0: 
michael@0:   queued_lines_.clear();
michael@0: 
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: bool StabsReader::ProcessFunction() {
michael@0:   assert(!iterator_->at_end && iterator_->type == N_FUN);
michael@0: 
michael@0:   uint64_t function_address = iterator_->value;
michael@0:   // The STABS string for an N_FUN entry is the name of the function,
michael@0:   // followed by a colon, followed by type information for the
michael@0:   // function.  We want to pass the name alone to StartFunction.
michael@0:   const char *stab_string = SymbolString();
michael@0:   const char *name_end = strchr(stab_string, ':');
michael@0:   if (! name_end)
michael@0:     name_end = stab_string + strlen(stab_string);
michael@0:   string name(stab_string, name_end - stab_string);
michael@0:   if (! handler_->StartFunction(name, function_address))
michael@0:     return false;
michael@0:   ++iterator_;
michael@0: 
michael@0:   // If there were any SLINE records given before the function, report them now.
michael@0:   for (vector<Line>::const_iterator it = queued_lines_.begin();
michael@0:        it != queued_lines_.end(); it++) {
michael@0:     if (!handler_->Line(it->address, it->filename, it->number))
michael@0:       return false;
michael@0:   }
michael@0:   queued_lines_.clear();
michael@0: 
michael@0:   while (!iterator_->at_end) {
michael@0:     if (iterator_->type == N_SO || iterator_->type == N_FUN)
michael@0:       break;
michael@0:     else if (iterator_->type == N_SLINE) {
michael@0:       // The value of an N_SLINE entry is the offset of the line from
michael@0:       // the function's start address.
michael@0:       uint64_t line_address = function_address + iterator_->value;
michael@0:       // The n_desc of a N_SLINE entry is the line number.  It's a
michael@0:       // signed 16-bit field; line numbers from 32768 to 65535 are
michael@0:       // stored as n-65536.
michael@0:       uint16_t line_number = iterator_->descriptor;
michael@0:       if (! handler_->Line(line_address, current_source_file_, line_number))
michael@0:         return false;
michael@0:       ++iterator_;
michael@0:     } else if (iterator_->type == N_SOL) {
michael@0:       current_source_file_ = SymbolString();
michael@0:       ++iterator_;
michael@0:     } else
michael@0:       // Ignore anything else.
michael@0:       ++iterator_;
michael@0:   }
michael@0: 
michael@0:   // We've reached the end of the function. See if we can figure out its
michael@0:   // ending address.
michael@0:   uint64_t ending_address = 0;
michael@0:   if (!iterator_->at_end) {
michael@0:     assert(iterator_->type == N_SO || iterator_->type == N_FUN);
michael@0:     if (iterator_->type == N_FUN) {
michael@0:       const char *symbol_name = SymbolString();
michael@0:       if (symbol_name[0] == '\0') {
michael@0:         // An N_FUN entry with no name is a terminator for this function;
michael@0:         // its value is the function's size.
michael@0:         ending_address = function_address + iterator_->value;
michael@0:         ++iterator_;
michael@0:       } else {
michael@0:         // An N_FUN entry with a name is the next function, and we can take
michael@0:         // its value as our ending address. Don't advance the iterator, as
michael@0:         // we'll use this symbol to start the next function as well.
michael@0:         ending_address = iterator_->value;
michael@0:       }
michael@0:     } else {
michael@0:       // An N_SO entry could be an end-of-compilation-unit marker, or the
michael@0:       // start of the next compilation unit, but in either case, its value
michael@0:       // is our ending address. We don't advance the iterator;
michael@0:       // ProcessCompilationUnit will decide what to do with this symbol.
michael@0:       ending_address = iterator_->value;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   if (! handler_->EndFunction(ending_address))
michael@0:     return false;
michael@0: 
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: bool StabsReader::ProcessExtern() {
michael@0: #if defined(HAVE_MACH_O_NLIST_H)
michael@0:   assert(!iterator_->at_end &&
michael@0:          (iterator_->type & N_STAB) == 0 &&
michael@0:          (iterator_->type & N_TYPE) == N_SECT);
michael@0: #endif
michael@0: 
michael@0:   // TODO(mark): only do symbols in the text section?
michael@0:   if (!handler_->Extern(SymbolString(), iterator_->value))
michael@0:     return false;
michael@0: 
michael@0:   ++iterator_;
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: } // namespace google_breakpad