michael@0: // Copyright (c) 2010 Google Inc. All Rights Reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Original author: Jim Blandy michael@0: michael@0: // This file implements the google_breakpad::StabsReader class. michael@0: // See stabs_reader.h. michael@0: michael@0: #include "common/stabs_reader.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include michael@0: michael@0: #include "common/using_std_string.h" michael@0: michael@0: using std::vector; michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer, michael@0: bool big_endian, size_t value_size) michael@0: : value_size_(value_size), cursor_(buffer, big_endian) { michael@0: // Actually, we could handle weird sizes just fine, but they're michael@0: // probably mistakes --- expressed in bits, say. michael@0: assert(value_size == 4 || value_size == 8); michael@0: entry_.index = 0; michael@0: Fetch(); michael@0: } michael@0: michael@0: void StabsReader::EntryIterator::Fetch() { michael@0: cursor_ michael@0: .Read(4, false, &entry_.name_offset) michael@0: .Read(1, false, &entry_.type) michael@0: .Read(1, false, &entry_.other) michael@0: .Read(2, false, &entry_.descriptor) michael@0: .Read(value_size_, false, &entry_.value); michael@0: entry_.at_end = !cursor_; michael@0: } michael@0: michael@0: StabsReader::StabsReader(const uint8_t *stab, size_t stab_size, michael@0: const uint8_t *stabstr, size_t stabstr_size, michael@0: bool big_endian, size_t value_size, bool unitized, michael@0: StabsHandler *handler) michael@0: : entries_(stab, stab_size), michael@0: strings_(stabstr, stabstr_size), michael@0: iterator_(&entries_, big_endian, value_size), michael@0: unitized_(unitized), michael@0: handler_(handler), michael@0: string_offset_(0), michael@0: next_cu_string_offset_(0), michael@0: current_source_file_(NULL) { } michael@0: michael@0: const char *StabsReader::SymbolString() { michael@0: ptrdiff_t offset = string_offset_ + iterator_->name_offset; michael@0: if (offset < 0 || (size_t) offset >= strings_.Size()) { michael@0: handler_->Warning("symbol %d: name offset outside the string section\n", michael@0: iterator_->index); michael@0: // Return our null string, to keep our promise about all names being michael@0: // taken from the string section. michael@0: offset = 0; michael@0: } michael@0: return reinterpret_cast(strings_.start + offset); michael@0: } michael@0: michael@0: bool StabsReader::Process() { michael@0: while (!iterator_->at_end) { michael@0: if (iterator_->type == N_SO) { michael@0: if (! ProcessCompilationUnit()) michael@0: return false; michael@0: } else if (iterator_->type == N_UNDF && unitized_) { michael@0: // In unitized STABS (including Linux STABS, and pretty much anything michael@0: // else that puts STABS data in sections), at the head of each michael@0: // compilation unit's entries there is an N_UNDF stab giving the michael@0: // number of symbols in the compilation unit, and the number of bytes michael@0: // that compilation unit's strings take up in the .stabstr section. michael@0: // Each CU's strings are separate; the n_strx values are offsets michael@0: // within the current CU's portion of the .stabstr section. michael@0: // michael@0: // As an optimization, the GNU linker combines all the michael@0: // compilation units into one, with a single N_UNDF at the michael@0: // beginning. However, other linkers, like Gold, do not perform michael@0: // this optimization. michael@0: string_offset_ = next_cu_string_offset_; michael@0: next_cu_string_offset_ = iterator_->value; michael@0: ++iterator_; michael@0: } michael@0: #if defined(HAVE_MACH_O_NLIST_H) michael@0: // Export symbols in Mach-O binaries look like this. michael@0: // This is necessary in order to be able to dump symbols michael@0: // from OS X system libraries. michael@0: else if ((iterator_->type & N_STAB) == 0 && michael@0: (iterator_->type & N_TYPE) == N_SECT) { michael@0: ProcessExtern(); michael@0: } michael@0: #endif michael@0: else { michael@0: ++iterator_; michael@0: } michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: bool StabsReader::ProcessCompilationUnit() { michael@0: assert(!iterator_->at_end && iterator_->type == N_SO); michael@0: michael@0: // There may be an N_SO entry whose name ends with a slash, michael@0: // indicating the directory in which the compilation occurred. michael@0: // The build directory defaults to NULL. michael@0: const char *build_directory = NULL; michael@0: { michael@0: const char *name = SymbolString(); michael@0: if (name[0] && name[strlen(name) - 1] == '/') { michael@0: build_directory = name; michael@0: ++iterator_; michael@0: } michael@0: } michael@0: michael@0: // We expect to see an N_SO entry with a filename next, indicating michael@0: // the start of the compilation unit. michael@0: { michael@0: if (iterator_->at_end || iterator_->type != N_SO) michael@0: return true; michael@0: const char *name = SymbolString(); michael@0: if (name[0] == '\0') { michael@0: // This seems to be a stray end-of-compilation-unit marker; michael@0: // consume it, but don't report the end, since we didn't see a michael@0: // beginning. michael@0: ++iterator_; michael@0: return true; michael@0: } michael@0: current_source_file_ = name; michael@0: } michael@0: michael@0: if (! handler_->StartCompilationUnit(current_source_file_, michael@0: iterator_->value, michael@0: build_directory)) michael@0: return false; michael@0: michael@0: ++iterator_; michael@0: michael@0: // The STABS documentation says that some compilers may emit michael@0: // additional N_SO entries with names immediately following the michael@0: // first, and that they should be ignored. However, the original michael@0: // Breakpad STABS reader doesn't ignore them, so we won't either. michael@0: michael@0: // Process the body of the compilation unit, up to the next N_SO. michael@0: while (!iterator_->at_end && iterator_->type != N_SO) { michael@0: if (iterator_->type == N_FUN) { michael@0: if (! ProcessFunction()) michael@0: return false; michael@0: } else if (iterator_->type == N_SLINE) { michael@0: // Mac OS X STABS place SLINE records before functions. michael@0: Line line; michael@0: // The value of an N_SLINE entry that appears outside a function is michael@0: // the absolute address of the line. michael@0: line.address = iterator_->value; michael@0: line.filename = current_source_file_; michael@0: // The n_desc of a N_SLINE entry is the line number. It's a michael@0: // signed 16-bit field; line numbers from 32768 to 65535 are michael@0: // stored as n-65536. michael@0: line.number = (uint16_t) iterator_->descriptor; michael@0: queued_lines_.push_back(line); michael@0: ++iterator_; michael@0: } else if (iterator_->type == N_SOL) { michael@0: current_source_file_ = SymbolString(); michael@0: ++iterator_; michael@0: } else { michael@0: // Ignore anything else. michael@0: ++iterator_; michael@0: } michael@0: } michael@0: michael@0: // An N_SO with an empty name indicates the end of the compilation michael@0: // unit. Default to zero. michael@0: uint64_t ending_address = 0; michael@0: if (!iterator_->at_end) { michael@0: assert(iterator_->type == N_SO); michael@0: const char *name = SymbolString(); michael@0: if (name[0] == '\0') { michael@0: ending_address = iterator_->value; michael@0: ++iterator_; michael@0: } michael@0: } michael@0: michael@0: if (! handler_->EndCompilationUnit(ending_address)) michael@0: return false; michael@0: michael@0: queued_lines_.clear(); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool StabsReader::ProcessFunction() { michael@0: assert(!iterator_->at_end && iterator_->type == N_FUN); michael@0: michael@0: uint64_t function_address = iterator_->value; michael@0: // The STABS string for an N_FUN entry is the name of the function, michael@0: // followed by a colon, followed by type information for the michael@0: // function. We want to pass the name alone to StartFunction. michael@0: const char *stab_string = SymbolString(); michael@0: const char *name_end = strchr(stab_string, ':'); michael@0: if (! name_end) michael@0: name_end = stab_string + strlen(stab_string); michael@0: string name(stab_string, name_end - stab_string); michael@0: if (! handler_->StartFunction(name, function_address)) michael@0: return false; michael@0: ++iterator_; michael@0: michael@0: // If there were any SLINE records given before the function, report them now. michael@0: for (vector::const_iterator it = queued_lines_.begin(); michael@0: it != queued_lines_.end(); it++) { michael@0: if (!handler_->Line(it->address, it->filename, it->number)) michael@0: return false; michael@0: } michael@0: queued_lines_.clear(); michael@0: michael@0: while (!iterator_->at_end) { michael@0: if (iterator_->type == N_SO || iterator_->type == N_FUN) michael@0: break; michael@0: else if (iterator_->type == N_SLINE) { michael@0: // The value of an N_SLINE entry is the offset of the line from michael@0: // the function's start address. michael@0: uint64_t line_address = function_address + iterator_->value; michael@0: // The n_desc of a N_SLINE entry is the line number. It's a michael@0: // signed 16-bit field; line numbers from 32768 to 65535 are michael@0: // stored as n-65536. michael@0: uint16_t line_number = iterator_->descriptor; michael@0: if (! handler_->Line(line_address, current_source_file_, line_number)) michael@0: return false; michael@0: ++iterator_; michael@0: } else if (iterator_->type == N_SOL) { michael@0: current_source_file_ = SymbolString(); michael@0: ++iterator_; michael@0: } else michael@0: // Ignore anything else. michael@0: ++iterator_; michael@0: } michael@0: michael@0: // We've reached the end of the function. See if we can figure out its michael@0: // ending address. michael@0: uint64_t ending_address = 0; michael@0: if (!iterator_->at_end) { michael@0: assert(iterator_->type == N_SO || iterator_->type == N_FUN); michael@0: if (iterator_->type == N_FUN) { michael@0: const char *symbol_name = SymbolString(); michael@0: if (symbol_name[0] == '\0') { michael@0: // An N_FUN entry with no name is a terminator for this function; michael@0: // its value is the function's size. michael@0: ending_address = function_address + iterator_->value; michael@0: ++iterator_; michael@0: } else { michael@0: // An N_FUN entry with a name is the next function, and we can take michael@0: // its value as our ending address. Don't advance the iterator, as michael@0: // we'll use this symbol to start the next function as well. michael@0: ending_address = iterator_->value; michael@0: } michael@0: } else { michael@0: // An N_SO entry could be an end-of-compilation-unit marker, or the michael@0: // start of the next compilation unit, but in either case, its value michael@0: // is our ending address. We don't advance the iterator; michael@0: // ProcessCompilationUnit will decide what to do with this symbol. michael@0: ending_address = iterator_->value; michael@0: } michael@0: } michael@0: michael@0: if (! handler_->EndFunction(ending_address)) michael@0: return false; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool StabsReader::ProcessExtern() { michael@0: #if defined(HAVE_MACH_O_NLIST_H) michael@0: assert(!iterator_->at_end && michael@0: (iterator_->type & N_STAB) == 0 && michael@0: (iterator_->type & N_TYPE) == N_SECT); michael@0: #endif michael@0: michael@0: // TODO(mark): only do symbols in the text section? michael@0: if (!handler_->Extern(SymbolString(), iterator_->value)) michael@0: return false; michael@0: michael@0: ++iterator_; michael@0: return true; michael@0: } michael@0: michael@0: } // namespace google_breakpad