michael@0: // -*- mode: c++ -*- michael@0: michael@0: // Copyright (c) 2010 Google Inc. All Rights Reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Original author: Jim Blandy michael@0: michael@0: // stabs_reader.h: Define StabsReader, a parser for STABS debugging michael@0: // information. A description of the STABS debugging format can be michael@0: // found at: michael@0: // michael@0: // http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html michael@0: // michael@0: // The comments here assume you understand the format. michael@0: // michael@0: // This parser can handle big-endian and little-endian data, and the symbol michael@0: // values may be either 32 or 64 bits long. It handles both STABS in michael@0: // sections (as used on Linux) and STABS appearing directly in an michael@0: // a.out-like symbol table (as used in Darwin OS X Mach-O files). michael@0: michael@0: #ifndef COMMON_STABS_READER_H__ michael@0: #define COMMON_STABS_READER_H__ michael@0: michael@0: #include michael@0: #include michael@0: michael@0: #ifdef HAVE_CONFIG_H michael@0: #include michael@0: #endif michael@0: michael@0: #ifdef HAVE_A_OUT_H michael@0: #include michael@0: #endif michael@0: #ifdef HAVE_MACH_O_NLIST_H michael@0: #include michael@0: #endif michael@0: michael@0: #include michael@0: #include michael@0: michael@0: #include "common/byte_cursor.h" michael@0: #include "common/using_std_string.h" michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: class StabsHandler; michael@0: michael@0: class StabsReader { michael@0: public: michael@0: // Create a reader for the STABS debug information whose .stab section is michael@0: // being traversed by ITERATOR, and whose .stabstr section is referred to michael@0: // by STRINGS. The reader will call the member functions of HANDLER to michael@0: // report the information it finds, when the reader's 'Process' member michael@0: // function is called. michael@0: // michael@0: // BIG_ENDIAN should be true if the entries in the .stab section are in michael@0: // big-endian form, or false if they are in little-endian form. michael@0: // michael@0: // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value' michael@0: // field in each entry in bytes. michael@0: // michael@0: // UNITIZED should be true if the STABS data is stored in units with michael@0: // N_UNDF headers. This is usually the case for STABS stored in sections, michael@0: // like .stab/.stabstr, and usually not the case for STABS stored in the michael@0: // actual symbol table; UNITIZED should be true when parsing Linux stabs, michael@0: // false when parsing Mac OS X STABS. For details, see: michael@0: // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html michael@0: // michael@0: // Note that, in ELF, the .stabstr section should be found using the michael@0: // 'sh_link' field of the .stab section header, not by name. michael@0: StabsReader(const uint8_t *stab, size_t stab_size, michael@0: const uint8_t *stabstr, size_t stabstr_size, michael@0: bool big_endian, size_t value_size, bool unitized, michael@0: StabsHandler *handler); michael@0: michael@0: // Process the STABS data, calling the handler's member functions to michael@0: // report what we find. While the handler functions return true, michael@0: // continue to process until we reach the end of the section. If we michael@0: // processed the entire section and all handlers returned true, michael@0: // return true. If any handler returned false, return false. michael@0: // michael@0: // This is only meant to be called once per StabsReader instance; michael@0: // resuming a prior processing pass that stopped abruptly isn't supported. michael@0: bool Process(); michael@0: michael@0: private: michael@0: michael@0: // An class for walking arrays of STABS entries. This isolates the main michael@0: // STABS reader from the exact format (size; endianness) of the entries michael@0: // themselves. michael@0: class EntryIterator { michael@0: public: michael@0: // The contents of a STABS entry, adjusted for the host's endianness, michael@0: // word size, 'struct nlist' layout, and so on. michael@0: struct Entry { michael@0: // True if this iterator has reached the end of the entry array. When michael@0: // this is set, the other members of this structure are not valid. michael@0: bool at_end; michael@0: michael@0: // The number of this entry within the list. michael@0: size_t index; michael@0: michael@0: // The current entry's name offset. This is the offset within the michael@0: // current compilation unit's strings, as establish by the N_UNDF entries. michael@0: size_t name_offset; michael@0: michael@0: // The current entry's type, 'other' field, descriptor, and value. michael@0: unsigned char type; michael@0: unsigned char other; michael@0: short descriptor; michael@0: uint64_t value; michael@0: }; michael@0: michael@0: // Create a EntryIterator walking the entries in BUFFER. Treat the michael@0: // entries as big-endian if BIG_ENDIAN is true, as little-endian michael@0: // otherwise. Assume each entry has a 'value' field whose size is michael@0: // VALUE_SIZE. michael@0: // michael@0: // This would not be terribly clean to extend to other format variations, michael@0: // but it's enough to handle Linux and Mac, and we'd like STABS to die michael@0: // anyway. michael@0: // michael@0: // For the record: on Linux, STABS entry values are always 32 bits, michael@0: // regardless of the architecture address size (don't ask me why); on michael@0: // Mac, they are 32 or 64 bits long. Oddly, the section header's entry michael@0: // size for a Linux ELF .stab section varies according to the ELF class michael@0: // from 12 to 20 even as the actual entries remain unchanged. michael@0: EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size); michael@0: michael@0: // Move to the next entry. This function's behavior is undefined if michael@0: // at_end() is true when it is called. michael@0: EntryIterator &operator++() { Fetch(); entry_.index++; return *this; } michael@0: michael@0: // Dereferencing this iterator produces a reference to an Entry structure michael@0: // that holds the current entry's values. The entry is owned by this michael@0: // EntryIterator, and will be invalidated at the next call to operator++. michael@0: const Entry &operator*() const { return entry_; } michael@0: const Entry *operator->() const { return &entry_; } michael@0: michael@0: private: michael@0: // Read the STABS entry at cursor_, and set entry_ appropriately. michael@0: void Fetch(); michael@0: michael@0: // The size of entries' value field, in bytes. michael@0: size_t value_size_; michael@0: michael@0: // A byte cursor traversing buffer_. michael@0: ByteCursor cursor_; michael@0: michael@0: // Values for the entry this iterator refers to. michael@0: Entry entry_; michael@0: }; michael@0: michael@0: // A source line, saved to be reported later. michael@0: struct Line { michael@0: uint64_t address; michael@0: const char *filename; michael@0: int number; michael@0: }; michael@0: michael@0: // Return the name of the current symbol. michael@0: const char *SymbolString(); michael@0: michael@0: // Process a compilation unit starting at symbol_. Return true michael@0: // to continue processing, or false to abort. michael@0: bool ProcessCompilationUnit(); michael@0: michael@0: // Process a function in current_source_file_ starting at symbol_. michael@0: // Return true to continue processing, or false to abort. michael@0: bool ProcessFunction(); michael@0: michael@0: // Process an exported function symbol. michael@0: // Return true to continue processing, or false to abort. michael@0: bool ProcessExtern(); michael@0: michael@0: // The STABS entries being parsed. michael@0: ByteBuffer entries_; michael@0: michael@0: // The string section to which the entries refer. michael@0: ByteBuffer strings_; michael@0: michael@0: // The iterator walking the STABS entries. michael@0: EntryIterator iterator_; michael@0: michael@0: // True if the data is "unitized"; see the explanation in the comment for michael@0: // StabsReader::StabsReader. michael@0: bool unitized_; michael@0: michael@0: StabsHandler *handler_; michael@0: michael@0: // The offset of the current compilation unit's strings within stabstr_. michael@0: size_t string_offset_; michael@0: michael@0: // The value string_offset_ should have for the next compilation unit, michael@0: // as established by N_UNDF entries. michael@0: size_t next_cu_string_offset_; michael@0: michael@0: // The current source file name. michael@0: const char *current_source_file_; michael@0: michael@0: // Mac OS X STABS place SLINE records before functions; we accumulate a michael@0: // vector of these until we see the FUN record, and then report them michael@0: // after the StartFunction call. michael@0: std::vector queued_lines_; michael@0: }; michael@0: michael@0: // Consumer-provided callback structure for the STABS reader. Clients michael@0: // of the STABS reader provide an instance of this structure. The michael@0: // reader then invokes the member functions of that instance to report michael@0: // the information it finds. michael@0: // michael@0: // The default definitions of the member functions do nothing, and return michael@0: // true so processing will continue. michael@0: class StabsHandler { michael@0: public: michael@0: StabsHandler() { } michael@0: virtual ~StabsHandler() { } michael@0: michael@0: // Some general notes about the handler callback functions: michael@0: michael@0: // Processing proceeds until the end of the .stabs section, or until michael@0: // one of these functions returns false. michael@0: michael@0: // The addresses given are as reported in the STABS info, without michael@0: // regard for whether the module may be loaded at different michael@0: // addresses at different times (a shared library, say). When michael@0: // processing STABS from an ELF shared library, the addresses given michael@0: // all assume the library is loaded at its nominal load address. michael@0: // They are *not* offsets from the nominal load address. If you michael@0: // want offsets, you must subtract off the library's nominal load michael@0: // address. michael@0: michael@0: // The arguments to these functions named FILENAME are all michael@0: // references to strings stored in the .stabstr section. Because michael@0: // both the Linux and Solaris linkers factor out duplicate strings michael@0: // from the .stabstr section, the consumer can assume that if two michael@0: // FILENAME values are different addresses, they represent different michael@0: // file names. michael@0: // michael@0: // Thus, it's safe to use (say) std::map, which does michael@0: // string address comparisons, not string content comparisons. michael@0: // Since all the strings are in same array of characters --- the michael@0: // .stabstr section --- comparing their addresses produces michael@0: // predictable, if not lexicographically meaningful, results. michael@0: michael@0: // Begin processing a compilation unit whose main source file is michael@0: // named FILENAME, and whose base address is ADDRESS. If michael@0: // BUILD_DIRECTORY is non-NULL, it is the name of the build michael@0: // directory in which the compilation occurred. michael@0: virtual bool StartCompilationUnit(const char *filename, uint64_t address, michael@0: const char *build_directory) { michael@0: return true; michael@0: } michael@0: michael@0: // Finish processing the compilation unit. If ADDRESS is non-zero, michael@0: // it is the ending address of the compilation unit. If ADDRESS is michael@0: // zero, then the compilation unit's ending address is not michael@0: // available, and the consumer must infer it by other means. michael@0: virtual bool EndCompilationUnit(uint64_t address) { return true; } michael@0: michael@0: // Begin processing a function named NAME, whose starting address is michael@0: // ADDRESS. This function belongs to the compilation unit that was michael@0: // most recently started but not ended. michael@0: // michael@0: // Note that, unlike filenames, NAME is not a pointer into the michael@0: // .stabstr section; this is because the name as it appears in the michael@0: // STABS data is followed by type information. The value passed to michael@0: // StartFunction is the function name alone. michael@0: // michael@0: // In languages that use name mangling, like C++, NAME is mangled. michael@0: virtual bool StartFunction(const string &name, uint64_t address) { michael@0: return true; michael@0: } michael@0: michael@0: // Finish processing the function. If ADDRESS is non-zero, it is michael@0: // the ending address for the function. If ADDRESS is zero, then michael@0: // the function's ending address is not available, and the consumer michael@0: // must infer it by other means. michael@0: virtual bool EndFunction(uint64_t address) { return true; } michael@0: michael@0: // Report that the code at ADDRESS is attributable to line NUMBER of michael@0: // the source file named FILENAME. The caller must infer the ending michael@0: // address of the line. michael@0: virtual bool Line(uint64_t address, const char *filename, int number) { michael@0: return true; michael@0: } michael@0: michael@0: // Report that an exported function NAME is present at ADDRESS. michael@0: // The size of the function is unknown. michael@0: virtual bool Extern(const string &name, uint64_t address) { michael@0: return true; michael@0: } michael@0: michael@0: // Report a warning. FORMAT is a printf-like format string, michael@0: // specifying how to format the subsequent arguments. michael@0: virtual void Warning(const char *format, ...) = 0; michael@0: }; michael@0: michael@0: } // namespace google_breakpad michael@0: michael@0: #endif // COMMON_STABS_READER_H__