michael@0: // -*- mode: c++ -*- michael@0: michael@0: // Copyright (c) 2010 Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Original author: Jim Blandy michael@0: michael@0: // The DwarfLineToModule class accepts line number information from a michael@0: // DWARF parser and adds it to a google_breakpad::Module. The Module michael@0: // can write that data out as a Breakpad symbol file. michael@0: michael@0: #ifndef COMMON_LINUX_DWARF_LINE_TO_MODULE_H michael@0: #define COMMON_LINUX_DWARF_LINE_TO_MODULE_H michael@0: michael@0: #include michael@0: michael@0: #include "common/module.h" michael@0: #include "common/dwarf/dwarf2reader.h" michael@0: #include "common/using_std_string.h" michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: // A class for producing a vector of google_breakpad::Module::Line michael@0: // instances from parsed DWARF line number data. michael@0: // michael@0: // An instance of this class can be provided as a handler to a michael@0: // dwarf2reader::LineInfo DWARF line number information parser. The michael@0: // handler accepts source location information from the parser and michael@0: // uses it to produce a vector of google_breakpad::Module::Line michael@0: // objects, referring to google_breakpad::Module::File objects added michael@0: // to a particular google_breakpad::Module. michael@0: // michael@0: // GNU toolchain omitted sections support: michael@0: // ====================================== michael@0: // michael@0: // Given the right options, the GNU toolchain will omit unreferenced michael@0: // functions from the final executable. Unfortunately, when it does so, it michael@0: // does not remove the associated portions of the DWARF line number michael@0: // program; instead, it gives the DW_LNE_set_address instructions referring michael@0: // to the now-deleted code addresses of zero. Given this input, the DWARF michael@0: // line parser will call AddLine with a series of lines starting at address michael@0: // zero. For example, here is the output from 'readelf -wl' for a program michael@0: // with four functions, the first three of which have been omitted: michael@0: // michael@0: // Line Number Statements: michael@0: // Extended opcode 2: set Address to 0x0 michael@0: // Advance Line by 14 to 15 michael@0: // Copy michael@0: // Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16 michael@0: // Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18 michael@0: // Advance PC by 2 to 0xd michael@0: // Extended opcode 1: End of Sequence michael@0: // michael@0: // Extended opcode 2: set Address to 0x0 michael@0: // Advance Line by 14 to 15 michael@0: // Copy michael@0: // Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16 michael@0: // Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18 michael@0: // Advance PC by 2 to 0xd michael@0: // Extended opcode 1: End of Sequence michael@0: // michael@0: // Extended opcode 2: set Address to 0x0 michael@0: // Advance Line by 19 to 20 michael@0: // Copy michael@0: // Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 21 michael@0: // Special opcode 76: advance Address by 5 to 0x8 and Line by 1 to 22 michael@0: // Advance PC by 2 to 0xa michael@0: // Extended opcode 1: End of Sequence michael@0: // michael@0: // Extended opcode 2: set Address to 0x80483a4 michael@0: // Advance Line by 23 to 24 michael@0: // Copy michael@0: // Special opcode 202: advance Address by 14 to 0x80483b2 and Line by 1 to 25 michael@0: // Special opcode 76: advance Address by 5 to 0x80483b7 and Line by 1 to 26 michael@0: // Advance PC by 6 to 0x80483bd michael@0: // Extended opcode 1: End of Sequence michael@0: // michael@0: // Instead of collecting runs of lines describing code that is not there, michael@0: // we try to recognize and drop them. Since the linker doesn't explicitly michael@0: // distinguish references to dropped sections from genuine references to michael@0: // code at address zero, we must use a heuristic. We have chosen: michael@0: // michael@0: // - If a line starts at address zero, omit it. (On the platforms michael@0: // breakpad targets, it is extremely unlikely that there will be code michael@0: // at address zero.) michael@0: // michael@0: // - If a line starts immediately after an omitted line, omit it too. michael@0: class DwarfLineToModule: public dwarf2reader::LineInfoHandler { michael@0: public: michael@0: // As the DWARF line info parser passes us line records, add source michael@0: // files to MODULE, and add all lines to the end of LINES. LINES michael@0: // need not be empty. If the parser hands us a zero-length line, we michael@0: // omit it. If the parser hands us a line that extends beyond the michael@0: // end of the address space, we clip it. It's up to our client to michael@0: // sort out which lines belong to which functions; we don't add them michael@0: // to any particular function in MODULE ourselves. michael@0: DwarfLineToModule(Module *module, const string& compilation_dir, michael@0: vector *lines) michael@0: : module_(module), michael@0: compilation_dir_(compilation_dir), michael@0: lines_(lines), michael@0: highest_file_number_(-1), michael@0: omitted_line_end_(0), michael@0: warned_bad_file_number_(false), michael@0: warned_bad_directory_number_(false) { } michael@0: michael@0: ~DwarfLineToModule() { } michael@0: michael@0: void DefineDir(const string &name, uint32 dir_num); michael@0: void DefineFile(const string &name, int32 file_num, michael@0: uint32 dir_num, uint64 mod_time, michael@0: uint64 length); michael@0: void AddLine(uint64 address, uint64 length, michael@0: uint32 file_num, uint32 line_num, uint32 column_num); michael@0: michael@0: private: michael@0: michael@0: typedef std::map DirectoryTable; michael@0: typedef std::map FileTable; michael@0: michael@0: // The module we're contributing debugging info to. Owned by our michael@0: // client. michael@0: Module *module_; michael@0: michael@0: // The compilation directory for the current compilation unit whose michael@0: // lines are being accumulated. michael@0: string compilation_dir_; michael@0: michael@0: // The vector of lines we're accumulating. Owned by our client. michael@0: // michael@0: // In a Module, as in a breakpad symbol file, lines belong to michael@0: // specific functions, but DWARF simply assigns lines to addresses; michael@0: // one must infer the line/function relationship using the michael@0: // functions' beginning and ending addresses. So we can't add these michael@0: // to the appropriate function from module_ until we've read the michael@0: // function info as well. Instead, we accumulate lines here, and let michael@0: // whoever constructed this sort it all out. michael@0: vector *lines_; michael@0: michael@0: // A table mapping directory numbers to paths. michael@0: DirectoryTable directories_; michael@0: michael@0: // A table mapping file numbers to Module::File pointers. michael@0: FileTable files_; michael@0: michael@0: // The highest file number we've seen so far, or -1 if we've seen michael@0: // none. Used for dynamically defined file numbers. michael@0: int32 highest_file_number_; michael@0: michael@0: // This is the ending address of the last line we omitted, or zero if we michael@0: // didn't omit the previous line. It is zero before we have received any michael@0: // AddLine calls. michael@0: uint64 omitted_line_end_; michael@0: michael@0: // True if we've warned about: michael@0: bool warned_bad_file_number_; // bad file numbers michael@0: bool warned_bad_directory_number_; // bad directory numbers michael@0: }; michael@0: michael@0: } // namespace google_breakpad michael@0: michael@0: #endif // COMMON_LINUX_DWARF_LINE_TO_MODULE_H