michael@0: // -*- mode: c++ -*- michael@0: michael@0: // Copyright (c) 2010 Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Original author: Jim Blandy michael@0: michael@0: // module.h: Define google_breakpad::Module. A Module holds debugging michael@0: // information, and can write that information out as a Breakpad michael@0: // symbol file. michael@0: michael@0: #ifndef COMMON_LINUX_MODULE_H__ michael@0: #define COMMON_LINUX_MODULE_H__ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "common/symbol_data.h" michael@0: #include "common/using_std_string.h" michael@0: #include "common/unique_string.h" michael@0: #include "google_breakpad/common/breakpad_types.h" michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: using std::set; michael@0: using std::vector; michael@0: using std::map; michael@0: michael@0: // A Module represents the contents of a module, and supports methods michael@0: // for adding information produced by parsing STABS or DWARF data michael@0: // --- possibly both from the same file --- and then writing out the michael@0: // unified contents as a Breakpad-format symbol file. michael@0: class Module { michael@0: public: michael@0: // The type of addresses and sizes in a symbol table. michael@0: typedef uint64_t Address; michael@0: struct File; michael@0: struct Function; michael@0: struct Line; michael@0: struct Extern; michael@0: michael@0: // Addresses appearing in File, Function, and Line structures are michael@0: // absolute, not relative to the the module's load address. That michael@0: // is, if the module were loaded at its nominal load address, the michael@0: // addresses would be correct. michael@0: michael@0: // A source file. michael@0: struct File { michael@0: // The name of the source file. michael@0: string name; michael@0: michael@0: // The file's source id. The Write member function clears this michael@0: // field and assigns source ids a fresh, so any value placed here michael@0: // before calling Write will be lost. michael@0: int source_id; michael@0: }; michael@0: michael@0: // A function. michael@0: struct Function { michael@0: // For sorting by address. (Not style-guide compliant, but it's michael@0: // stupid not to put this in the struct.) michael@0: static bool CompareByAddress(const Function *x, const Function *y) { michael@0: return x->address < y->address; michael@0: } michael@0: michael@0: // The function's name. michael@0: string name; michael@0: michael@0: // The start address and length of the function's code. michael@0: Address address, size; michael@0: michael@0: // The function's parameter size. michael@0: Address parameter_size; michael@0: michael@0: // Source lines belonging to this function, sorted by increasing michael@0: // address. michael@0: vector lines; michael@0: }; michael@0: michael@0: // A source line. michael@0: struct Line { michael@0: // For sorting by address. (Not style-guide compliant, but it's michael@0: // stupid not to put this in the struct.) michael@0: static bool CompareByAddress(const Module::Line &x, const Module::Line &y) { michael@0: return x.address < y.address; michael@0: } michael@0: michael@0: Address address, size; // The address and size of the line's code. michael@0: File *file; // The source file. michael@0: int number; // The source line number. michael@0: }; michael@0: michael@0: // An exported symbol. michael@0: struct Extern { michael@0: Address address; michael@0: string name; michael@0: }; michael@0: michael@0: // Representation of an expression. This can either be a postfix michael@0: // expression, in which case it is stored as a string, or a simple michael@0: // expression of the form (identifier + imm) or *(identifier + imm). michael@0: // It can also be invalid (denoting "no value"). michael@0: enum ExprHow { michael@0: kExprInvalid = 1, michael@0: kExprPostfix, michael@0: kExprSimple, michael@0: kExprSimpleMem michael@0: }; michael@0: struct Expr { michael@0: // Construct a simple-form expression michael@0: Expr(const UniqueString* ident, long offset, bool deref) { michael@0: if (ident == ustr__empty()) { michael@0: Expr(); michael@0: } else { michael@0: postfix_ = ""; michael@0: ident_ = ident; michael@0: offset_ = offset; michael@0: how_ = deref ? kExprSimpleMem : kExprSimple; michael@0: } michael@0: } michael@0: // Construct an expression from a postfix string michael@0: Expr(string postfix) { michael@0: if (postfix.empty()) { michael@0: Expr(); michael@0: } else { michael@0: postfix_ = postfix; michael@0: ident_ = NULL; michael@0: offset_ = 0; michael@0: how_ = kExprPostfix; michael@0: } michael@0: } michael@0: // Construct an invalid expression michael@0: Expr() { michael@0: postfix_ = ""; michael@0: ident_ = NULL; michael@0: offset_ = 0; michael@0: how_ = kExprInvalid; michael@0: } michael@0: bool isExprInvalid() const { return how_ == kExprInvalid; } michael@0: michael@0: // Return the postfix expression string, either directly, michael@0: // if this is a postfix expression, or by synthesising it michael@0: // for a simple expression. michael@0: string getExprPostfix() const { michael@0: switch (how_) { michael@0: case kExprPostfix: michael@0: return postfix_; michael@0: case kExprSimple: michael@0: case kExprSimpleMem: { michael@0: char buf[40]; michael@0: sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+', michael@0: how_ == kExprSimple ? "" : " ^"); michael@0: return string(FromUniqueString(ident_)) + string(buf); michael@0: } michael@0: case kExprInvalid: michael@0: default: michael@0: assert(0 && "getExprPostfix: invalid Module::Expr type"); michael@0: return "Expr::genExprPostfix: kExprInvalid"; michael@0: } michael@0: } michael@0: michael@0: bool operator==(const Expr& other) const { michael@0: return how_ == other.how_ && michael@0: ident_ == other.ident_ && michael@0: offset_ == other.offset_ && michael@0: postfix_ == other.postfix_; michael@0: } michael@0: michael@0: // Returns an Expr which evaluates to |this| + |delta| michael@0: Expr add_delta(long delta) { michael@0: if (delta == 0) { michael@0: return *this; michael@0: } michael@0: // If it's a simple form expression of the form "identifier + offset", michael@0: // simply add |delta| on to |offset|. In the other two possible michael@0: // cases: michael@0: // *(identifier + offset) michael@0: // completely arbitrary postfix expression string michael@0: // the only option is to "downgrade" it to a postfix expression and add michael@0: // "+/- delta" at the end of the string, since the result can't be michael@0: // represented in the simple form. michael@0: switch (how_) { michael@0: case kExprSimpleMem: michael@0: case kExprPostfix: { michael@0: char buf[40]; michael@0: sprintf(buf, " %ld %c", labs(delta), delta < 0 ? '-' : '+'); michael@0: return Expr(getExprPostfix() + string(buf)); michael@0: } michael@0: case kExprSimple: michael@0: return Expr(ident_, offset_ + delta, false); michael@0: case kExprInvalid: michael@0: default: michael@0: assert(0 && "add_delta: invalid Module::Expr type"); michael@0: // Invalid inputs produce an invalid result michael@0: return Expr(); michael@0: } michael@0: } michael@0: michael@0: // Returns an Expr which evaluates to *|this| michael@0: Expr deref() { michael@0: // In the simplest case, a kExprSimple can be changed into a michael@0: // kExprSimpleMem. In all other cases it has to be dumped as a michael@0: // postfix string, and " ^" added at the end. michael@0: switch (how_) { michael@0: case kExprSimple: { michael@0: Expr t = *this; michael@0: t.how_ = kExprSimpleMem; michael@0: return t; michael@0: } michael@0: case kExprSimpleMem: michael@0: case kExprPostfix: { michael@0: return Expr(getExprPostfix() + " ^"); michael@0: } michael@0: case kExprInvalid: michael@0: default: michael@0: assert(0 && "deref: invalid Module::Expr type"); michael@0: // Invalid inputs produce an invalid result michael@0: return Expr(); michael@0: } michael@0: } michael@0: michael@0: // The identifier that gives the starting value for simple expressions. michael@0: const UniqueString* ident_; michael@0: // The offset to add for simple expressions. michael@0: long offset_; michael@0: // The Postfix expression string to evaluate for non-simple expressions. michael@0: string postfix_; michael@0: // The operation expressed by this expression. michael@0: ExprHow how_; michael@0: michael@0: friend std::ostream& operator<<(std::ostream& stream, const Expr& expr); michael@0: }; michael@0: michael@0: // A map from register names to expressions that recover michael@0: // their values. This can represent a complete set of rules to michael@0: // follow at some address, or a set of changes to be applied to an michael@0: // extant set of rules. michael@0: typedef map RuleMap; michael@0: michael@0: // A map from addresses to RuleMaps, representing changes that take michael@0: // effect at given addresses. michael@0: typedef map RuleChangeMap; michael@0: michael@0: // A range of 'STACK CFI' stack walking information. An instance of michael@0: // this structure corresponds to a 'STACK CFI INIT' record and the michael@0: // subsequent 'STACK CFI' records that fall within its range. michael@0: struct StackFrameEntry { michael@0: // The starting address and number of bytes of machine code this michael@0: // entry covers. michael@0: Address address, size; michael@0: michael@0: // The initial register recovery rules, in force at the starting michael@0: // address. michael@0: RuleMap initial_rules; michael@0: michael@0: // A map from addresses to rule changes. To find the rules in michael@0: // force at a given address, start with initial_rules, and then michael@0: // apply the changes given in this map for all addresses up to and michael@0: // including the address you're interested in. michael@0: RuleChangeMap rule_changes; michael@0: }; michael@0: michael@0: struct FunctionCompare { michael@0: bool operator() (const Function *lhs, michael@0: const Function *rhs) const { michael@0: if (lhs->address == rhs->address) michael@0: return lhs->name < rhs->name; michael@0: return lhs->address < rhs->address; michael@0: } michael@0: }; michael@0: michael@0: struct ExternCompare { michael@0: bool operator() (const Extern *lhs, michael@0: const Extern *rhs) const { michael@0: return lhs->address < rhs->address; michael@0: } michael@0: }; michael@0: michael@0: struct StackFrameEntryCompare { michael@0: bool operator() (const StackFrameEntry* lhs, michael@0: const StackFrameEntry* rhs) const { michael@0: return lhs->address < rhs->address; michael@0: } michael@0: }; michael@0: michael@0: // Create a new module with the given name, operating system, michael@0: // architecture, and ID string. michael@0: Module(const string &name, const string &os, const string &architecture, michael@0: const string &id); michael@0: ~Module(); michael@0: michael@0: // Set the module's load address to LOAD_ADDRESS; addresses given michael@0: // for functions and lines will be written to the Breakpad symbol michael@0: // file as offsets from this address. Construction initializes this michael@0: // module's load address to zero: addresses written to the symbol michael@0: // file will be the same as they appear in the Function, Line, and michael@0: // StackFrameEntry structures. michael@0: // michael@0: // Note that this member function has no effect on addresses stored michael@0: // in the data added to this module; the Write member function michael@0: // simply subtracts off the load address from addresses before it michael@0: // prints them. Only the last load address given before calling michael@0: // Write is used. michael@0: void SetLoadAddress(Address load_address); michael@0: michael@0: // Add FUNCTION to the module. FUNCTION's name must not be empty. michael@0: // This module owns all Function objects added with this function: michael@0: // destroying the module destroys them as well. michael@0: void AddFunction(Function *function); michael@0: michael@0: // Add all the functions in [BEGIN,END) to the module. michael@0: // This module owns all Function objects added with this function: michael@0: // destroying the module destroys them as well. michael@0: void AddFunctions(vector::iterator begin, michael@0: vector::iterator end); michael@0: michael@0: // Add STACK_FRAME_ENTRY to the module. michael@0: // This module owns all StackFrameEntry objects added with this michael@0: // function: destroying the module destroys them as well. michael@0: void AddStackFrameEntry(StackFrameEntry *stack_frame_entry); michael@0: michael@0: // Add PUBLIC to the module. michael@0: // This module owns all Extern objects added with this function: michael@0: // destroying the module destroys them as well. michael@0: void AddExtern(Extern *ext); michael@0: michael@0: // If this module has a file named NAME, return a pointer to it. If michael@0: // it has none, then create one and return a pointer to the new michael@0: // file. This module owns all File objects created using these michael@0: // functions; destroying the module destroys them as well. michael@0: File *FindFile(const string &name); michael@0: File *FindFile(const char *name); michael@0: michael@0: // If this module has a file named NAME, return a pointer to it. michael@0: // Otherwise, return NULL. michael@0: File *FindExistingFile(const string &name); michael@0: michael@0: // Insert pointers to the functions added to this module at I in michael@0: // VEC. The pointed-to Functions are still owned by this module. michael@0: // (Since this is effectively a copy of the function list, this is michael@0: // mostly useful for testing; other uses should probably get a more michael@0: // appropriate interface.) michael@0: void GetFunctions(vector *vec, vector::iterator i); michael@0: michael@0: // If this module has a function at ADDRESS, return a pointer to it. michael@0: // Otherwise, return NULL. michael@0: Function* FindFunctionByAddress(Address address); michael@0: michael@0: // Insert pointers to the externs added to this module at I in michael@0: // VEC. The pointed-to Externs are still owned by this module. michael@0: // (Since this is effectively a copy of the extern list, this is michael@0: // mostly useful for testing; other uses should probably get a more michael@0: // appropriate interface.) michael@0: void GetExterns(vector *vec, vector::iterator i); michael@0: michael@0: // If this module has an extern whose base address is less than ADDRESS, michael@0: // return a pointer to it. Otherwise, return NULL. michael@0: Extern* FindExternByAddress(Address address); michael@0: michael@0: // Clear VEC and fill it with pointers to the Files added to this michael@0: // module, sorted by name. The pointed-to Files are still owned by michael@0: // this module. (Since this is effectively a copy of the file list, michael@0: // this is mostly useful for testing; other uses should probably get michael@0: // a more appropriate interface.) michael@0: void GetFiles(vector *vec); michael@0: michael@0: // Clear VEC and fill it with pointers to the StackFrameEntry michael@0: // objects that have been added to this module. (Since this is michael@0: // effectively a copy of the stack frame entry list, this is mostly michael@0: // useful for testing; other uses should probably get michael@0: // a more appropriate interface.) michael@0: void GetStackFrameEntries(vector *vec); michael@0: michael@0: // If this module has a StackFrameEntry whose address range covers michael@0: // ADDRESS, return it. Otherwise return NULL. michael@0: StackFrameEntry* FindStackFrameEntryByAddress(Address address); michael@0: michael@0: // Find those files in this module that are actually referred to by michael@0: // functions' line number data, and assign them source id numbers. michael@0: // Set the source id numbers for all other files --- unused by the michael@0: // source line data --- to -1. We do this before writing out the michael@0: // symbol file, at which point we omit any unused files. michael@0: void AssignSourceIds(); michael@0: michael@0: // Call AssignSourceIds, and write this module to STREAM in the michael@0: // breakpad symbol format. Return true if all goes well, or false if michael@0: // an error occurs. This method writes out: michael@0: // - a header based on the values given to the constructor, michael@0: // If symbol_data is not ONLY_CFI then: michael@0: // - the source files added via FindFile, michael@0: // - the functions added via AddFunctions, each with its lines, michael@0: // - all public records, michael@0: // If symbol_data is not NO_CFI then: michael@0: // - all CFI records. michael@0: // Addresses in the output are all relative to the load address michael@0: // established by SetLoadAddress. michael@0: bool Write(std::ostream &stream, SymbolData symbol_data); michael@0: michael@0: private: michael@0: // Report an error that has occurred writing the symbol file, using michael@0: // errno to find the appropriate cause. Return false. michael@0: static bool ReportError(); michael@0: michael@0: // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI' michael@0: // records, without a final newline. Return true if all goes well; michael@0: // if an error occurs, return false, and leave errno set. michael@0: static bool WriteRuleMap(const RuleMap &rule_map, std::ostream &stream); michael@0: michael@0: // Module header entries. michael@0: string name_, os_, architecture_, id_; michael@0: michael@0: // The module's nominal load address. Addresses for functions and michael@0: // lines are absolute, assuming the module is loaded at this michael@0: // address. michael@0: Address load_address_; michael@0: michael@0: // Relation for maps whose keys are strings shared with some other michael@0: // structure. michael@0: struct CompareStringPtrs { michael@0: bool operator()(const string *x, const string *y) const { return *x < *y; } michael@0: }; michael@0: michael@0: // A map from filenames to File structures. The map's keys are michael@0: // pointers to the Files' names. michael@0: typedef map FileByNameMap; michael@0: michael@0: // A set containing Function structures, sorted by address. michael@0: typedef set FunctionSet; michael@0: michael@0: // A set containing Extern structures, sorted by address. michael@0: typedef set ExternSet; michael@0: michael@0: // A set containing StackFrameEntry structures, sorted by address. michael@0: typedef set StackFrameEntrySet; michael@0: michael@0: // The module owns all the files and functions that have been added michael@0: // to it; destroying the module frees the Files and Functions these michael@0: // point to. michael@0: FileByNameMap files_; // This module's source files. michael@0: FunctionSet functions_; // This module's functions. michael@0: michael@0: // The module owns all the call frame info entries that have been michael@0: // added to it. michael@0: StackFrameEntrySet stack_frame_entries_; michael@0: michael@0: // The module owns all the externs that have been added to it; michael@0: // destroying the module frees the Externs these point to. michael@0: ExternSet externs_; michael@0: }; michael@0: michael@0: } // namespace google_breakpad michael@0: michael@0: #endif // COMMON_LINUX_MODULE_H__