Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
michael@0 | 1 | // -*- mode: C++ -*- |
michael@0 | 2 | |
michael@0 | 3 | // Copyright (c) 2010 Google Inc. All Rights Reserved. |
michael@0 | 4 | // |
michael@0 | 5 | // Redistribution and use in source and binary forms, with or without |
michael@0 | 6 | // modification, are permitted provided that the following conditions are |
michael@0 | 7 | // met: |
michael@0 | 8 | // |
michael@0 | 9 | // * Redistributions of source code must retain the above copyright |
michael@0 | 10 | // notice, this list of conditions and the following disclaimer. |
michael@0 | 11 | // * Redistributions in binary form must reproduce the above |
michael@0 | 12 | // copyright notice, this list of conditions and the following disclaimer |
michael@0 | 13 | // in the documentation and/or other materials provided with the |
michael@0 | 14 | // distribution. |
michael@0 | 15 | // * Neither the name of Google Inc. nor the names of its |
michael@0 | 16 | // contributors may be used to endorse or promote products derived from |
michael@0 | 17 | // this software without specific prior written permission. |
michael@0 | 18 | // |
michael@0 | 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
michael@0 | 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
michael@0 | 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
michael@0 | 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
michael@0 | 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
michael@0 | 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
michael@0 | 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
michael@0 | 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
michael@0 | 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
michael@0 | 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
michael@0 | 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 30 | |
michael@0 | 31 | // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
michael@0 | 32 | |
michael@0 | 33 | // This file contains definitions related to the DWARF2/3 reader and |
michael@0 | 34 | // it's handler interfaces. |
michael@0 | 35 | // The DWARF2/3 specification can be found at |
michael@0 | 36 | // http://dwarf.freestandards.org and should be considered required |
michael@0 | 37 | // reading if you wish to modify the implementation. |
michael@0 | 38 | // Only a cursory attempt is made to explain terminology that is |
michael@0 | 39 | // used here, as it is much better explained in the standard documents |
michael@0 | 40 | #ifndef COMMON_DWARF_DWARF2READER_H__ |
michael@0 | 41 | #define COMMON_DWARF_DWARF2READER_H__ |
michael@0 | 42 | |
michael@0 | 43 | #include <list> |
michael@0 | 44 | #include <map> |
michael@0 | 45 | #include <string> |
michael@0 | 46 | #include <utility> |
michael@0 | 47 | #include <vector> |
michael@0 | 48 | |
michael@0 | 49 | #include "common/dwarf/bytereader.h" |
michael@0 | 50 | #include "common/dwarf/dwarf2enums.h" |
michael@0 | 51 | #include "common/dwarf/types.h" |
michael@0 | 52 | #include "common/using_std_string.h" |
michael@0 | 53 | |
michael@0 | 54 | namespace dwarf2reader { |
michael@0 | 55 | struct LineStateMachine; |
michael@0 | 56 | class Dwarf2Handler; |
michael@0 | 57 | class LineInfoHandler; |
michael@0 | 58 | |
michael@0 | 59 | // This maps from a string naming a section to a pair containing a |
michael@0 | 60 | // the data for the section, and the size of the section. |
michael@0 | 61 | typedef std::map<string, std::pair<const char*, uint64> > SectionMap; |
michael@0 | 62 | typedef std::list<std::pair<enum DwarfAttribute, enum DwarfForm> > |
michael@0 | 63 | AttributeList; |
michael@0 | 64 | typedef AttributeList::iterator AttributeIterator; |
michael@0 | 65 | typedef AttributeList::const_iterator ConstAttributeIterator; |
michael@0 | 66 | |
michael@0 | 67 | struct LineInfoHeader { |
michael@0 | 68 | uint64 total_length; |
michael@0 | 69 | uint16 version; |
michael@0 | 70 | uint64 prologue_length; |
michael@0 | 71 | uint8 min_insn_length; // insn stands for instructin |
michael@0 | 72 | bool default_is_stmt; // stmt stands for statement |
michael@0 | 73 | int8 line_base; |
michael@0 | 74 | uint8 line_range; |
michael@0 | 75 | uint8 opcode_base; |
michael@0 | 76 | // Use a pointer so that signalsafe_addr2line is able to use this structure |
michael@0 | 77 | // without heap allocation problem. |
michael@0 | 78 | std::vector<unsigned char> *std_opcode_lengths; |
michael@0 | 79 | }; |
michael@0 | 80 | |
michael@0 | 81 | class LineInfo { |
michael@0 | 82 | public: |
michael@0 | 83 | |
michael@0 | 84 | // Initializes a .debug_line reader. Buffer and buffer length point |
michael@0 | 85 | // to the beginning and length of the line information to read. |
michael@0 | 86 | // Reader is a ByteReader class that has the endianness set |
michael@0 | 87 | // properly. |
michael@0 | 88 | LineInfo(const char* buffer_, uint64 buffer_length, |
michael@0 | 89 | ByteReader* reader, LineInfoHandler* handler); |
michael@0 | 90 | |
michael@0 | 91 | virtual ~LineInfo() { |
michael@0 | 92 | if (header_.std_opcode_lengths) { |
michael@0 | 93 | delete header_.std_opcode_lengths; |
michael@0 | 94 | } |
michael@0 | 95 | } |
michael@0 | 96 | |
michael@0 | 97 | // Start processing line info, and calling callbacks in the handler. |
michael@0 | 98 | // Consumes the line number information for a single compilation unit. |
michael@0 | 99 | // Returns the number of bytes processed. |
michael@0 | 100 | uint64 Start(); |
michael@0 | 101 | |
michael@0 | 102 | // Process a single line info opcode at START using the state |
michael@0 | 103 | // machine at LSM. Return true if we should define a line using the |
michael@0 | 104 | // current state of the line state machine. Place the length of the |
michael@0 | 105 | // opcode in LEN. |
michael@0 | 106 | // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm |
michael@0 | 107 | // passes the address of PC. In other words, LSM_PASSES_PC will be |
michael@0 | 108 | // set to true, if the following condition is met. |
michael@0 | 109 | // |
michael@0 | 110 | // lsm's old address < PC <= lsm's new address |
michael@0 | 111 | static bool ProcessOneOpcode(ByteReader* reader, |
michael@0 | 112 | LineInfoHandler* handler, |
michael@0 | 113 | const struct LineInfoHeader &header, |
michael@0 | 114 | const char* start, |
michael@0 | 115 | struct LineStateMachine* lsm, |
michael@0 | 116 | size_t* len, |
michael@0 | 117 | uintptr pc, |
michael@0 | 118 | bool *lsm_passes_pc); |
michael@0 | 119 | |
michael@0 | 120 | private: |
michael@0 | 121 | // Reads the DWARF2/3 header for this line info. |
michael@0 | 122 | void ReadHeader(); |
michael@0 | 123 | |
michael@0 | 124 | // Reads the DWARF2/3 line information |
michael@0 | 125 | void ReadLines(); |
michael@0 | 126 | |
michael@0 | 127 | // The associated handler to call processing functions in |
michael@0 | 128 | LineInfoHandler* handler_; |
michael@0 | 129 | |
michael@0 | 130 | // The associated ByteReader that handles endianness issues for us |
michael@0 | 131 | ByteReader* reader_; |
michael@0 | 132 | |
michael@0 | 133 | // A DWARF2/3 line info header. This is not the same size as |
michael@0 | 134 | // in the actual file, as the one in the file may have a 32 bit or |
michael@0 | 135 | // 64 bit lengths |
michael@0 | 136 | |
michael@0 | 137 | struct LineInfoHeader header_; |
michael@0 | 138 | |
michael@0 | 139 | // buffer is the buffer for our line info, starting at exactly where |
michael@0 | 140 | // the line info to read is. after_header is the place right after |
michael@0 | 141 | // the end of the line information header. |
michael@0 | 142 | const char* buffer_; |
michael@0 | 143 | uint64 buffer_length_; |
michael@0 | 144 | const char* after_header_; |
michael@0 | 145 | }; |
michael@0 | 146 | |
michael@0 | 147 | // This class is the main interface between the line info reader and |
michael@0 | 148 | // the client. The virtual functions inside this get called for |
michael@0 | 149 | // interesting events that happen during line info reading. The |
michael@0 | 150 | // default implementation does nothing |
michael@0 | 151 | |
michael@0 | 152 | class LineInfoHandler { |
michael@0 | 153 | public: |
michael@0 | 154 | LineInfoHandler() { } |
michael@0 | 155 | |
michael@0 | 156 | virtual ~LineInfoHandler() { } |
michael@0 | 157 | |
michael@0 | 158 | // Called when we define a directory. NAME is the directory name, |
michael@0 | 159 | // DIR_NUM is the directory number |
michael@0 | 160 | virtual void DefineDir(const string& name, uint32 dir_num) { } |
michael@0 | 161 | |
michael@0 | 162 | // Called when we define a filename. NAME is the filename, FILE_NUM |
michael@0 | 163 | // is the file number which is -1 if the file index is the next |
michael@0 | 164 | // index after the last numbered index (this happens when files are |
michael@0 | 165 | // dynamically defined by the line program), DIR_NUM is the |
michael@0 | 166 | // directory index for the directory name of this file, MOD_TIME is |
michael@0 | 167 | // the modification time of the file, and LENGTH is the length of |
michael@0 | 168 | // the file |
michael@0 | 169 | virtual void DefineFile(const string& name, int32 file_num, |
michael@0 | 170 | uint32 dir_num, uint64 mod_time, |
michael@0 | 171 | uint64 length) { } |
michael@0 | 172 | |
michael@0 | 173 | // Called when the line info reader has a new line, address pair |
michael@0 | 174 | // ready for us. ADDRESS is the address of the code, LENGTH is the |
michael@0 | 175 | // length of its machine code in bytes, FILE_NUM is the file number |
michael@0 | 176 | // containing the code, LINE_NUM is the line number in that file for |
michael@0 | 177 | // the code, and COLUMN_NUM is the column number the code starts at, |
michael@0 | 178 | // if we know it (0 otherwise). |
michael@0 | 179 | virtual void AddLine(uint64 address, uint64 length, |
michael@0 | 180 | uint32 file_num, uint32 line_num, uint32 column_num) { } |
michael@0 | 181 | }; |
michael@0 | 182 | |
michael@0 | 183 | // The base of DWARF2/3 debug info is a DIE (Debugging Information |
michael@0 | 184 | // Entry. |
michael@0 | 185 | // DWARF groups DIE's into a tree and calls the root of this tree a |
michael@0 | 186 | // "compilation unit". Most of the time, there is one compilation |
michael@0 | 187 | // unit in the .debug_info section for each file that had debug info |
michael@0 | 188 | // generated. |
michael@0 | 189 | // Each DIE consists of |
michael@0 | 190 | |
michael@0 | 191 | // 1. a tag specifying a thing that is being described (ie |
michael@0 | 192 | // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc |
michael@0 | 193 | // 2. attributes (such as DW_AT_location for location in memory, |
michael@0 | 194 | // DW_AT_name for name), and data for each attribute. |
michael@0 | 195 | // 3. A flag saying whether the DIE has children or not |
michael@0 | 196 | |
michael@0 | 197 | // In order to gain some amount of compression, the format of |
michael@0 | 198 | // each DIE (tag name, attributes and data forms for the attributes) |
michael@0 | 199 | // are stored in a separate table called the "abbreviation table". |
michael@0 | 200 | // This is done because a large number of DIEs have the exact same tag |
michael@0 | 201 | // and list of attributes, but different data for those attributes. |
michael@0 | 202 | // As a result, the .debug_info section is just a stream of data, and |
michael@0 | 203 | // requires reading of the .debug_abbrev section to say what the data |
michael@0 | 204 | // means. |
michael@0 | 205 | |
michael@0 | 206 | // As a warning to the user, it should be noted that the reason for |
michael@0 | 207 | // using absolute offsets from the beginning of .debug_info is that |
michael@0 | 208 | // DWARF2/3 supports referencing DIE's from other DIE's by their offset |
michael@0 | 209 | // from either the current compilation unit start, *or* the beginning |
michael@0 | 210 | // of the .debug_info section. This means it is possible to reference |
michael@0 | 211 | // a DIE in one compilation unit from a DIE in another compilation |
michael@0 | 212 | // unit. This style of reference is usually used to eliminate |
michael@0 | 213 | // duplicated information that occurs across compilation |
michael@0 | 214 | // units, such as base types, etc. GCC 3.4+ support this with |
michael@0 | 215 | // -feliminate-dwarf2-dups. Other toolchains will sometimes do |
michael@0 | 216 | // duplicate elimination in the linker. |
michael@0 | 217 | |
michael@0 | 218 | class CompilationUnit { |
michael@0 | 219 | public: |
michael@0 | 220 | |
michael@0 | 221 | // Initialize a compilation unit. This requires a map of sections, |
michael@0 | 222 | // the offset of this compilation unit in the .debug_info section, a |
michael@0 | 223 | // ByteReader, and a Dwarf2Handler class to call callbacks in. |
michael@0 | 224 | CompilationUnit(const SectionMap& sections, uint64 offset, |
michael@0 | 225 | ByteReader* reader, Dwarf2Handler* handler); |
michael@0 | 226 | virtual ~CompilationUnit() { |
michael@0 | 227 | if (abbrevs_) delete abbrevs_; |
michael@0 | 228 | } |
michael@0 | 229 | |
michael@0 | 230 | // Begin reading a Dwarf2 compilation unit, and calling the |
michael@0 | 231 | // callbacks in the Dwarf2Handler |
michael@0 | 232 | |
michael@0 | 233 | // Return the full length of the compilation unit, including |
michael@0 | 234 | // headers. This plus the starting offset passed to the constructor |
michael@0 | 235 | // is the offset of the end of the compilation unit --- and the |
michael@0 | 236 | // start of the next compilation unit, if there is one. |
michael@0 | 237 | uint64 Start(); |
michael@0 | 238 | |
michael@0 | 239 | private: |
michael@0 | 240 | |
michael@0 | 241 | // This struct represents a single DWARF2/3 abbreviation |
michael@0 | 242 | // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a |
michael@0 | 243 | // tag and a list of attributes, as well as the data form of each attribute. |
michael@0 | 244 | struct Abbrev { |
michael@0 | 245 | uint64 number; |
michael@0 | 246 | enum DwarfTag tag; |
michael@0 | 247 | bool has_children; |
michael@0 | 248 | AttributeList attributes; |
michael@0 | 249 | }; |
michael@0 | 250 | |
michael@0 | 251 | // A DWARF2/3 compilation unit header. This is not the same size as |
michael@0 | 252 | // in the actual file, as the one in the file may have a 32 bit or |
michael@0 | 253 | // 64 bit length. |
michael@0 | 254 | struct CompilationUnitHeader { |
michael@0 | 255 | uint64 length; |
michael@0 | 256 | uint16 version; |
michael@0 | 257 | uint64 abbrev_offset; |
michael@0 | 258 | uint8 address_size; |
michael@0 | 259 | } header_; |
michael@0 | 260 | |
michael@0 | 261 | // Reads the DWARF2/3 header for this compilation unit. |
michael@0 | 262 | void ReadHeader(); |
michael@0 | 263 | |
michael@0 | 264 | // Reads the DWARF2/3 abbreviations for this compilation unit |
michael@0 | 265 | void ReadAbbrevs(); |
michael@0 | 266 | |
michael@0 | 267 | // Processes a single DIE for this compilation unit and return a new |
michael@0 | 268 | // pointer just past the end of it |
michael@0 | 269 | const char* ProcessDIE(uint64 dieoffset, |
michael@0 | 270 | const char* start, |
michael@0 | 271 | const Abbrev& abbrev); |
michael@0 | 272 | |
michael@0 | 273 | // Processes a single attribute and return a new pointer just past the |
michael@0 | 274 | // end of it |
michael@0 | 275 | const char* ProcessAttribute(uint64 dieoffset, |
michael@0 | 276 | const char* start, |
michael@0 | 277 | enum DwarfAttribute attr, |
michael@0 | 278 | enum DwarfForm form); |
michael@0 | 279 | |
michael@0 | 280 | // Processes all DIEs for this compilation unit |
michael@0 | 281 | void ProcessDIEs(); |
michael@0 | 282 | |
michael@0 | 283 | // Skips the die with attributes specified in ABBREV starting at |
michael@0 | 284 | // START, and return the new place to position the stream to. |
michael@0 | 285 | const char* SkipDIE(const char* start, |
michael@0 | 286 | const Abbrev& abbrev); |
michael@0 | 287 | |
michael@0 | 288 | // Skips the attribute starting at START, with FORM, and return the |
michael@0 | 289 | // new place to position the stream to. |
michael@0 | 290 | const char* SkipAttribute(const char* start, |
michael@0 | 291 | enum DwarfForm form); |
michael@0 | 292 | |
michael@0 | 293 | // Offset from section start is the offset of this compilation unit |
michael@0 | 294 | // from the beginning of the .debug_info section. |
michael@0 | 295 | uint64 offset_from_section_start_; |
michael@0 | 296 | |
michael@0 | 297 | // buffer is the buffer for our CU, starting at .debug_info + offset |
michael@0 | 298 | // passed in from constructor. |
michael@0 | 299 | // after_header points to right after the compilation unit header. |
michael@0 | 300 | const char* buffer_; |
michael@0 | 301 | uint64 buffer_length_; |
michael@0 | 302 | const char* after_header_; |
michael@0 | 303 | |
michael@0 | 304 | // The associated ByteReader that handles endianness issues for us |
michael@0 | 305 | ByteReader* reader_; |
michael@0 | 306 | |
michael@0 | 307 | // The map of sections in our file to buffers containing their data |
michael@0 | 308 | const SectionMap& sections_; |
michael@0 | 309 | |
michael@0 | 310 | // The associated handler to call processing functions in |
michael@0 | 311 | Dwarf2Handler* handler_; |
michael@0 | 312 | |
michael@0 | 313 | // Set of DWARF2/3 abbreviations for this compilation unit. Indexed |
michael@0 | 314 | // by abbreviation number, which means that abbrevs_[0] is not |
michael@0 | 315 | // valid. |
michael@0 | 316 | std::vector<Abbrev>* abbrevs_; |
michael@0 | 317 | |
michael@0 | 318 | // String section buffer and length, if we have a string section. |
michael@0 | 319 | // This is here to avoid doing a section lookup for strings in |
michael@0 | 320 | // ProcessAttribute, which is in the hot path for DWARF2 reading. |
michael@0 | 321 | const char* string_buffer_; |
michael@0 | 322 | uint64 string_buffer_length_; |
michael@0 | 323 | }; |
michael@0 | 324 | |
michael@0 | 325 | // This class is the main interface between the reader and the |
michael@0 | 326 | // client. The virtual functions inside this get called for |
michael@0 | 327 | // interesting events that happen during DWARF2 reading. |
michael@0 | 328 | // The default implementation skips everything. |
michael@0 | 329 | |
michael@0 | 330 | class Dwarf2Handler { |
michael@0 | 331 | public: |
michael@0 | 332 | Dwarf2Handler() { } |
michael@0 | 333 | |
michael@0 | 334 | virtual ~Dwarf2Handler() { } |
michael@0 | 335 | |
michael@0 | 336 | // Start to process a compilation unit at OFFSET from the beginning of the |
michael@0 | 337 | // .debug_info section. Return false if you would like to skip this |
michael@0 | 338 | // compilation unit. |
michael@0 | 339 | virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, |
michael@0 | 340 | uint8 offset_size, uint64 cu_length, |
michael@0 | 341 | uint8 dwarf_version) { return false; } |
michael@0 | 342 | |
michael@0 | 343 | // Start to process a DIE at OFFSET from the beginning of the .debug_info |
michael@0 | 344 | // section. Return false if you would like to skip this DIE. |
michael@0 | 345 | virtual bool StartDIE(uint64 offset, enum DwarfTag tag) { return false; } |
michael@0 | 346 | |
michael@0 | 347 | // Called when we have an attribute with unsigned data to give to our |
michael@0 | 348 | // handler. The attribute is for the DIE at OFFSET from the beginning of the |
michael@0 | 349 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
michael@0 | 350 | // DATA. |
michael@0 | 351 | virtual void ProcessAttributeUnsigned(uint64 offset, |
michael@0 | 352 | enum DwarfAttribute attr, |
michael@0 | 353 | enum DwarfForm form, |
michael@0 | 354 | uint64 data) { } |
michael@0 | 355 | |
michael@0 | 356 | // Called when we have an attribute with signed data to give to our handler. |
michael@0 | 357 | // The attribute is for the DIE at OFFSET from the beginning of the |
michael@0 | 358 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
michael@0 | 359 | // DATA. |
michael@0 | 360 | virtual void ProcessAttributeSigned(uint64 offset, |
michael@0 | 361 | enum DwarfAttribute attr, |
michael@0 | 362 | enum DwarfForm form, |
michael@0 | 363 | int64 data) { } |
michael@0 | 364 | |
michael@0 | 365 | // Called when we have an attribute whose value is a reference to |
michael@0 | 366 | // another DIE. The attribute belongs to the DIE at OFFSET from the |
michael@0 | 367 | // beginning of the .debug_info section. Its name is ATTR, its form |
michael@0 | 368 | // is FORM, and the offset of the DIE being referred to from the |
michael@0 | 369 | // beginning of the .debug_info section is DATA. |
michael@0 | 370 | virtual void ProcessAttributeReference(uint64 offset, |
michael@0 | 371 | enum DwarfAttribute attr, |
michael@0 | 372 | enum DwarfForm form, |
michael@0 | 373 | uint64 data) { } |
michael@0 | 374 | |
michael@0 | 375 | // Called when we have an attribute with a buffer of data to give to our |
michael@0 | 376 | // handler. The attribute is for the DIE at OFFSET from the beginning of the |
michael@0 | 377 | // .debug_info section. Its name is ATTR, its form is FORM, DATA points to |
michael@0 | 378 | // the buffer's contents, and its length in bytes is LENGTH. The buffer is |
michael@0 | 379 | // owned by the caller, not the callee, and may not persist for very long. |
michael@0 | 380 | // If you want the data to be available later, it needs to be copied. |
michael@0 | 381 | virtual void ProcessAttributeBuffer(uint64 offset, |
michael@0 | 382 | enum DwarfAttribute attr, |
michael@0 | 383 | enum DwarfForm form, |
michael@0 | 384 | const char* data, |
michael@0 | 385 | uint64 len) { } |
michael@0 | 386 | |
michael@0 | 387 | // Called when we have an attribute with string data to give to our handler. |
michael@0 | 388 | // The attribute is for the DIE at OFFSET from the beginning of the |
michael@0 | 389 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
michael@0 | 390 | // DATA. |
michael@0 | 391 | virtual void ProcessAttributeString(uint64 offset, |
michael@0 | 392 | enum DwarfAttribute attr, |
michael@0 | 393 | enum DwarfForm form, |
michael@0 | 394 | const string& data) { } |
michael@0 | 395 | |
michael@0 | 396 | // Called when we have an attribute whose value is the 64-bit signature |
michael@0 | 397 | // of a type unit in the .debug_types section. OFFSET is the offset of |
michael@0 | 398 | // the DIE whose attribute we're reporting. ATTR and FORM are the |
michael@0 | 399 | // attribute's name and form. SIGNATURE is the type unit's signature. |
michael@0 | 400 | virtual void ProcessAttributeSignature(uint64 offset, |
michael@0 | 401 | enum DwarfAttribute attr, |
michael@0 | 402 | enum DwarfForm form, |
michael@0 | 403 | uint64 signature) { } |
michael@0 | 404 | |
michael@0 | 405 | // Called when finished processing the DIE at OFFSET. |
michael@0 | 406 | // Because DWARF2/3 specifies a tree of DIEs, you may get starts |
michael@0 | 407 | // before ends of the previous DIE, as we process children before |
michael@0 | 408 | // ending the parent. |
michael@0 | 409 | virtual void EndDIE(uint64 offset) { } |
michael@0 | 410 | |
michael@0 | 411 | }; |
michael@0 | 412 | |
michael@0 | 413 | // This class is a reader for DWARF's Call Frame Information. CFI |
michael@0 | 414 | // describes how to unwind stack frames --- even for functions that do |
michael@0 | 415 | // not follow fixed conventions for saving registers, whose frame size |
michael@0 | 416 | // varies as they execute, etc. |
michael@0 | 417 | // |
michael@0 | 418 | // CFI describes, at each machine instruction, how to compute the |
michael@0 | 419 | // stack frame's base address, how to find the return address, and |
michael@0 | 420 | // where to find the saved values of the caller's registers (if the |
michael@0 | 421 | // callee has stashed them somewhere to free up the registers for its |
michael@0 | 422 | // own use). |
michael@0 | 423 | // |
michael@0 | 424 | // For example, suppose we have a function whose machine code looks |
michael@0 | 425 | // like this (imagine an assembly language that looks like C, for a |
michael@0 | 426 | // machine with 32-bit registers, and a stack that grows towards lower |
michael@0 | 427 | // addresses): |
michael@0 | 428 | // |
michael@0 | 429 | // func: ; entry point; return address at sp |
michael@0 | 430 | // func+0: sp = sp - 16 ; allocate space for stack frame |
michael@0 | 431 | // func+1: sp[12] = r0 ; save r0 at sp+12 |
michael@0 | 432 | // ... ; other code, not frame-related |
michael@0 | 433 | // func+10: sp -= 4; *sp = x ; push some x on the stack |
michael@0 | 434 | // ... ; other code, not frame-related |
michael@0 | 435 | // func+20: r0 = sp[16] ; restore saved r0 |
michael@0 | 436 | // func+21: sp += 20 ; pop whole stack frame |
michael@0 | 437 | // func+22: pc = *sp; sp += 4 ; pop return address and jump to it |
michael@0 | 438 | // |
michael@0 | 439 | // DWARF CFI is (a very compressed representation of) a table with a |
michael@0 | 440 | // row for each machine instruction address and a column for each |
michael@0 | 441 | // register showing how to restore it, if possible. |
michael@0 | 442 | // |
michael@0 | 443 | // A special column named "CFA", for "Canonical Frame Address", tells how |
michael@0 | 444 | // to compute the base address of the frame; registers' entries may |
michael@0 | 445 | // refer to the CFA in describing where the registers are saved. |
michael@0 | 446 | // |
michael@0 | 447 | // Another special column, named "RA", represents the return address. |
michael@0 | 448 | // |
michael@0 | 449 | // For example, here is a complete (uncompressed) table describing the |
michael@0 | 450 | // function above: |
michael@0 | 451 | // |
michael@0 | 452 | // insn cfa r0 r1 ... ra |
michael@0 | 453 | // ======================================= |
michael@0 | 454 | // func+0: sp cfa[0] |
michael@0 | 455 | // func+1: sp+16 cfa[0] |
michael@0 | 456 | // func+2: sp+16 cfa[-4] cfa[0] |
michael@0 | 457 | // func+11: sp+20 cfa[-4] cfa[0] |
michael@0 | 458 | // func+21: sp+20 cfa[0] |
michael@0 | 459 | // func+22: sp cfa[0] |
michael@0 | 460 | // |
michael@0 | 461 | // Some things to note here: |
michael@0 | 462 | // |
michael@0 | 463 | // - Each row describes the state of affairs *before* executing the |
michael@0 | 464 | // instruction at the given address. Thus, the row for func+0 |
michael@0 | 465 | // describes the state before we allocate the stack frame. In the |
michael@0 | 466 | // next row, the formula for computing the CFA has changed, |
michael@0 | 467 | // reflecting that allocation. |
michael@0 | 468 | // |
michael@0 | 469 | // - The other entries are written in terms of the CFA; this allows |
michael@0 | 470 | // them to remain unchanged as the stack pointer gets bumped around. |
michael@0 | 471 | // For example, the rule for recovering the return address (the "ra" |
michael@0 | 472 | // column) remains unchanged throughout the function, even as the |
michael@0 | 473 | // stack pointer takes on three different offsets from the return |
michael@0 | 474 | // address. |
michael@0 | 475 | // |
michael@0 | 476 | // - Although we haven't shown it, most calling conventions designate |
michael@0 | 477 | // "callee-saves" and "caller-saves" registers. The callee must |
michael@0 | 478 | // preserve the values of callee-saves registers; if it uses them, |
michael@0 | 479 | // it must save their original values somewhere, and restore them |
michael@0 | 480 | // before it returns. In contrast, the callee is free to trash |
michael@0 | 481 | // caller-saves registers; if the callee uses these, it will |
michael@0 | 482 | // probably not bother to save them anywhere, and the CFI will |
michael@0 | 483 | // probably mark their values as "unrecoverable". |
michael@0 | 484 | // |
michael@0 | 485 | // (However, since the caller cannot assume the callee was going to |
michael@0 | 486 | // save them, caller-saves registers are probably dead in the caller |
michael@0 | 487 | // anyway, so compilers usually don't generate CFA for caller-saves |
michael@0 | 488 | // registers.) |
michael@0 | 489 | // |
michael@0 | 490 | // - Exactly where the CFA points is a matter of convention that |
michael@0 | 491 | // depends on the architecture and ABI in use. In the example, the |
michael@0 | 492 | // CFA is the value the stack pointer had upon entry to the |
michael@0 | 493 | // function, pointing at the saved return address. But on the x86, |
michael@0 | 494 | // the call frame information generated by GCC follows the |
michael@0 | 495 | // convention that the CFA is the address *after* the saved return |
michael@0 | 496 | // address. |
michael@0 | 497 | // |
michael@0 | 498 | // But by definition, the CFA remains constant throughout the |
michael@0 | 499 | // lifetime of the frame. This makes it a useful value for other |
michael@0 | 500 | // columns to refer to. It is also gives debuggers a useful handle |
michael@0 | 501 | // for identifying a frame. |
michael@0 | 502 | // |
michael@0 | 503 | // If you look at the table above, you'll notice that a given entry is |
michael@0 | 504 | // often the same as the one immediately above it: most instructions |
michael@0 | 505 | // change only one or two aspects of the stack frame, if they affect |
michael@0 | 506 | // it at all. The DWARF format takes advantage of this fact, and |
michael@0 | 507 | // reduces the size of the data by mentioning only the addresses and |
michael@0 | 508 | // columns at which changes take place. So for the above, DWARF CFI |
michael@0 | 509 | // data would only actually mention the following: |
michael@0 | 510 | // |
michael@0 | 511 | // insn cfa r0 r1 ... ra |
michael@0 | 512 | // ======================================= |
michael@0 | 513 | // func+0: sp cfa[0] |
michael@0 | 514 | // func+1: sp+16 |
michael@0 | 515 | // func+2: cfa[-4] |
michael@0 | 516 | // func+11: sp+20 |
michael@0 | 517 | // func+21: r0 |
michael@0 | 518 | // func+22: sp |
michael@0 | 519 | // |
michael@0 | 520 | // In fact, this is the way the parser reports CFI to the consumer: as |
michael@0 | 521 | // a series of statements of the form, "At address X, column Y changed |
michael@0 | 522 | // to Z," and related conventions for describing the initial state. |
michael@0 | 523 | // |
michael@0 | 524 | // Naturally, it would be impractical to have to scan the entire |
michael@0 | 525 | // program's CFI, noting changes as we go, just to recover the |
michael@0 | 526 | // unwinding rules in effect at one particular instruction. To avoid |
michael@0 | 527 | // this, CFI data is grouped into "entries", each of which covers a |
michael@0 | 528 | // specified range of addresses and begins with a complete statement |
michael@0 | 529 | // of the rules for all recoverable registers at that starting |
michael@0 | 530 | // address. Each entry typically covers a single function. |
michael@0 | 531 | // |
michael@0 | 532 | // Thus, to compute the contents of a given row of the table --- that |
michael@0 | 533 | // is, rules for recovering the CFA, RA, and registers at a given |
michael@0 | 534 | // instruction --- the consumer should find the entry that covers that |
michael@0 | 535 | // instruction's address, start with the initial state supplied at the |
michael@0 | 536 | // beginning of the entry, and work forward until it has processed all |
michael@0 | 537 | // the changes up to and including those for the present instruction. |
michael@0 | 538 | // |
michael@0 | 539 | // There are seven kinds of rules that can appear in an entry of the |
michael@0 | 540 | // table: |
michael@0 | 541 | // |
michael@0 | 542 | // - "undefined": The given register is not preserved by the callee; |
michael@0 | 543 | // its value cannot be recovered. |
michael@0 | 544 | // |
michael@0 | 545 | // - "same value": This register has the same value it did in the callee. |
michael@0 | 546 | // |
michael@0 | 547 | // - offset(N): The register is saved at offset N from the CFA. |
michael@0 | 548 | // |
michael@0 | 549 | // - val_offset(N): The value the register had in the caller is the |
michael@0 | 550 | // CFA plus offset N. (This is usually only useful for describing |
michael@0 | 551 | // the stack pointer.) |
michael@0 | 552 | // |
michael@0 | 553 | // - register(R): The register's value was saved in another register R. |
michael@0 | 554 | // |
michael@0 | 555 | // - expression(E): Evaluating the DWARF expression E using the |
michael@0 | 556 | // current frame's registers' values yields the address at which the |
michael@0 | 557 | // register was saved. |
michael@0 | 558 | // |
michael@0 | 559 | // - val_expression(E): Evaluating the DWARF expression E using the |
michael@0 | 560 | // current frame's registers' values yields the value the register |
michael@0 | 561 | // had in the caller. |
michael@0 | 562 | |
michael@0 | 563 | class CallFrameInfo { |
michael@0 | 564 | public: |
michael@0 | 565 | // The different kinds of entries one finds in CFI. Used internally, |
michael@0 | 566 | // and for error reporting. |
michael@0 | 567 | enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; |
michael@0 | 568 | |
michael@0 | 569 | // The handler class to which the parser hands the parsed call frame |
michael@0 | 570 | // information. Defined below. |
michael@0 | 571 | class Handler; |
michael@0 | 572 | |
michael@0 | 573 | // A reporter class, which CallFrameInfo uses to report errors |
michael@0 | 574 | // encountered while parsing call frame information. Defined below. |
michael@0 | 575 | class Reporter; |
michael@0 | 576 | |
michael@0 | 577 | // Create a DWARF CFI parser. BUFFER points to the contents of the |
michael@0 | 578 | // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. |
michael@0 | 579 | // REPORTER is an error reporter the parser should use to report |
michael@0 | 580 | // problems. READER is a ByteReader instance that has the endianness and |
michael@0 | 581 | // address size set properly. Report the data we find to HANDLER. |
michael@0 | 582 | // |
michael@0 | 583 | // This class can also parse Linux C++ exception handling data, as found |
michael@0 | 584 | // in '.eh_frame' sections. This data is a variant of DWARF CFI that is |
michael@0 | 585 | // placed in loadable segments so that it is present in the program's |
michael@0 | 586 | // address space, and is interpreted by the C++ runtime to search the |
michael@0 | 587 | // call stack for a handler interested in the exception being thrown, |
michael@0 | 588 | // actually pop the frames, and find cleanup code to run. |
michael@0 | 589 | // |
michael@0 | 590 | // There are two differences between the call frame information described |
michael@0 | 591 | // in the DWARF standard and the exception handling data Linux places in |
michael@0 | 592 | // the .eh_frame section: |
michael@0 | 593 | // |
michael@0 | 594 | // - Exception handling data uses uses a different format for call frame |
michael@0 | 595 | // information entry headers. The distinguished CIE id, the way FDEs |
michael@0 | 596 | // refer to their CIEs, and the way the end of the series of entries is |
michael@0 | 597 | // determined are all slightly different. |
michael@0 | 598 | // |
michael@0 | 599 | // If the constructor's EH_FRAME argument is true, then the |
michael@0 | 600 | // CallFrameInfo parses the entry headers as Linux C++ exception |
michael@0 | 601 | // handling data. If EH_FRAME is false or omitted, the CallFrameInfo |
michael@0 | 602 | // parses standard DWARF call frame information. |
michael@0 | 603 | // |
michael@0 | 604 | // - Linux C++ exception handling data uses CIE augmentation strings |
michael@0 | 605 | // beginning with 'z' to specify the presence of additional data after |
michael@0 | 606 | // the CIE and FDE headers and special encodings used for addresses in |
michael@0 | 607 | // frame description entries. |
michael@0 | 608 | // |
michael@0 | 609 | // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or |
michael@0 | 610 | // exception handling data if you have supplied READER with the base |
michael@0 | 611 | // addresses needed to interpret the pointer encodings that 'z' |
michael@0 | 612 | // augmentations can specify. See the ByteReader interface for details |
michael@0 | 613 | // about the base addresses. See the CallFrameInfo::Handler interface |
michael@0 | 614 | // for details about the additional information one might find in |
michael@0 | 615 | // 'z'-augmented data. |
michael@0 | 616 | // |
michael@0 | 617 | // Thus: |
michael@0 | 618 | // |
michael@0 | 619 | // - If you are parsing standard DWARF CFI, as found in a .debug_frame |
michael@0 | 620 | // section, you should pass false for the EH_FRAME argument, or omit |
michael@0 | 621 | // it, and you need not worry about providing READER with the |
michael@0 | 622 | // additional base addresses. |
michael@0 | 623 | // |
michael@0 | 624 | // - If you want to parse Linux C++ exception handling data from a |
michael@0 | 625 | // .eh_frame section, you should pass EH_FRAME as true, and call |
michael@0 | 626 | // READER's Set*Base member functions before calling our Start method. |
michael@0 | 627 | // |
michael@0 | 628 | // - If you want to parse DWARF CFI that uses the 'z' augmentations |
michael@0 | 629 | // (although I don't think any toolchain ever emits such data), you |
michael@0 | 630 | // could pass false for EH_FRAME, but call READER's Set*Base members. |
michael@0 | 631 | // |
michael@0 | 632 | // The extensions the Linux C++ ABI makes to DWARF for exception |
michael@0 | 633 | // handling are described here, rather poorly: |
michael@0 | 634 | // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html |
michael@0 | 635 | // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html |
michael@0 | 636 | // |
michael@0 | 637 | // The mechanics of C++ exception handling, personality routines, |
michael@0 | 638 | // and language-specific data areas are described here, rather nicely: |
michael@0 | 639 | // http://www.codesourcery.com/public/cxx-abi/abi-eh.html |
michael@0 | 640 | CallFrameInfo(const char *buffer, size_t buffer_length, |
michael@0 | 641 | ByteReader *reader, Handler *handler, Reporter *reporter, |
michael@0 | 642 | bool eh_frame = false) |
michael@0 | 643 | : buffer_(buffer), buffer_length_(buffer_length), |
michael@0 | 644 | reader_(reader), handler_(handler), reporter_(reporter), |
michael@0 | 645 | eh_frame_(eh_frame) { } |
michael@0 | 646 | |
michael@0 | 647 | ~CallFrameInfo() { } |
michael@0 | 648 | |
michael@0 | 649 | // Parse the entries in BUFFER, reporting what we find to HANDLER. |
michael@0 | 650 | // Return true if we reach the end of the section successfully, or |
michael@0 | 651 | // false if we encounter an error. |
michael@0 | 652 | bool Start(); |
michael@0 | 653 | |
michael@0 | 654 | // Return the textual name of KIND. For error reporting. |
michael@0 | 655 | static const char *KindName(EntryKind kind); |
michael@0 | 656 | |
michael@0 | 657 | private: |
michael@0 | 658 | |
michael@0 | 659 | struct CIE; |
michael@0 | 660 | |
michael@0 | 661 | // A CFI entry, either an FDE or a CIE. |
michael@0 | 662 | struct Entry { |
michael@0 | 663 | // The starting offset of the entry in the section, for error |
michael@0 | 664 | // reporting. |
michael@0 | 665 | size_t offset; |
michael@0 | 666 | |
michael@0 | 667 | // The start of this entry in the buffer. |
michael@0 | 668 | const char *start; |
michael@0 | 669 | |
michael@0 | 670 | // Which kind of entry this is. |
michael@0 | 671 | // |
michael@0 | 672 | // We want to be able to use this for error reporting even while we're |
michael@0 | 673 | // in the midst of parsing. Error reporting code may assume that kind, |
michael@0 | 674 | // offset, and start fields are valid, although kind may be kUnknown. |
michael@0 | 675 | EntryKind kind; |
michael@0 | 676 | |
michael@0 | 677 | // The end of this entry's common prologue (initial length and id), and |
michael@0 | 678 | // the start of this entry's kind-specific fields. |
michael@0 | 679 | const char *fields; |
michael@0 | 680 | |
michael@0 | 681 | // The start of this entry's instructions. |
michael@0 | 682 | const char *instructions; |
michael@0 | 683 | |
michael@0 | 684 | // The address past the entry's last byte in the buffer. (Note that |
michael@0 | 685 | // since offset points to the entry's initial length field, and the |
michael@0 | 686 | // length field is the number of bytes after that field, this is not |
michael@0 | 687 | // simply buffer_ + offset + length.) |
michael@0 | 688 | const char *end; |
michael@0 | 689 | |
michael@0 | 690 | // For both DWARF CFI and .eh_frame sections, this is the CIE id in a |
michael@0 | 691 | // CIE, and the offset of the associated CIE in an FDE. |
michael@0 | 692 | uint64 id; |
michael@0 | 693 | |
michael@0 | 694 | // The CIE that applies to this entry, if we've parsed it. If this is a |
michael@0 | 695 | // CIE, then this field points to this structure. |
michael@0 | 696 | CIE *cie; |
michael@0 | 697 | }; |
michael@0 | 698 | |
michael@0 | 699 | // A common information entry (CIE). |
michael@0 | 700 | struct CIE: public Entry { |
michael@0 | 701 | uint8 version; // CFI data version number |
michael@0 | 702 | string augmentation; // vendor format extension markers |
michael@0 | 703 | uint64 code_alignment_factor; // scale for code address adjustments |
michael@0 | 704 | int data_alignment_factor; // scale for stack pointer adjustments |
michael@0 | 705 | unsigned return_address_register; // which register holds the return addr |
michael@0 | 706 | |
michael@0 | 707 | // True if this CIE includes Linux C++ ABI 'z' augmentation data. |
michael@0 | 708 | bool has_z_augmentation; |
michael@0 | 709 | |
michael@0 | 710 | // Parsed 'z' augmentation data. These are meaningful only if |
michael@0 | 711 | // has_z_augmentation is true. |
michael@0 | 712 | bool has_z_lsda; // The 'z' augmentation included 'L'. |
michael@0 | 713 | bool has_z_personality; // The 'z' augmentation included 'P'. |
michael@0 | 714 | bool has_z_signal_frame; // The 'z' augmentation included 'S'. |
michael@0 | 715 | |
michael@0 | 716 | // If has_z_lsda is true, this is the encoding to be used for language- |
michael@0 | 717 | // specific data area pointers in FDEs. |
michael@0 | 718 | DwarfPointerEncoding lsda_encoding; |
michael@0 | 719 | |
michael@0 | 720 | // If has_z_personality is true, this is the encoding used for the |
michael@0 | 721 | // personality routine pointer in the augmentation data. |
michael@0 | 722 | DwarfPointerEncoding personality_encoding; |
michael@0 | 723 | |
michael@0 | 724 | // If has_z_personality is true, this is the address of the personality |
michael@0 | 725 | // routine --- or, if personality_encoding & DW_EH_PE_indirect, the |
michael@0 | 726 | // address where the personality routine's address is stored. |
michael@0 | 727 | uint64 personality_address; |
michael@0 | 728 | |
michael@0 | 729 | // This is the encoding used for addresses in the FDE header and |
michael@0 | 730 | // in DW_CFA_set_loc instructions. This is always valid, whether |
michael@0 | 731 | // or not we saw a 'z' augmentation string; its default value is |
michael@0 | 732 | // DW_EH_PE_absptr, which is what normal DWARF CFI uses. |
michael@0 | 733 | DwarfPointerEncoding pointer_encoding; |
michael@0 | 734 | }; |
michael@0 | 735 | |
michael@0 | 736 | // A frame description entry (FDE). |
michael@0 | 737 | struct FDE: public Entry { |
michael@0 | 738 | uint64 address; // start address of described code |
michael@0 | 739 | uint64 size; // size of described code, in bytes |
michael@0 | 740 | |
michael@0 | 741 | // If cie->has_z_lsda is true, then this is the language-specific data |
michael@0 | 742 | // area's address --- or its address's address, if cie->lsda_encoding |
michael@0 | 743 | // has the DW_EH_PE_indirect bit set. |
michael@0 | 744 | uint64 lsda_address; |
michael@0 | 745 | }; |
michael@0 | 746 | |
michael@0 | 747 | // Internal use. |
michael@0 | 748 | class Rule; |
michael@0 | 749 | class UndefinedRule; |
michael@0 | 750 | class SameValueRule; |
michael@0 | 751 | class OffsetRule; |
michael@0 | 752 | class ValOffsetRule; |
michael@0 | 753 | class RegisterRule; |
michael@0 | 754 | class ExpressionRule; |
michael@0 | 755 | class ValExpressionRule; |
michael@0 | 756 | class RuleMap; |
michael@0 | 757 | class State; |
michael@0 | 758 | |
michael@0 | 759 | // Parse the initial length and id of a CFI entry, either a CIE, an FDE, |
michael@0 | 760 | // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the |
michael@0 | 761 | // data to parse. On success, populate ENTRY as appropriate, and return |
michael@0 | 762 | // true. On failure, report the problem, and return false. Even if we |
michael@0 | 763 | // return false, set ENTRY->end to the first byte after the entry if we |
michael@0 | 764 | // were able to figure that out, or NULL if we weren't. |
michael@0 | 765 | bool ReadEntryPrologue(const char *cursor, Entry *entry); |
michael@0 | 766 | |
michael@0 | 767 | // Parse the fields of a CIE after the entry prologue, including any 'z' |
michael@0 | 768 | // augmentation data. Assume that the 'Entry' fields of CIE are |
michael@0 | 769 | // populated; use CIE->fields and CIE->end as the start and limit for |
michael@0 | 770 | // parsing. On success, populate the rest of *CIE, and return true; on |
michael@0 | 771 | // failure, report the problem and return false. |
michael@0 | 772 | bool ReadCIEFields(CIE *cie); |
michael@0 | 773 | |
michael@0 | 774 | // Parse the fields of an FDE after the entry prologue, including any 'z' |
michael@0 | 775 | // augmentation data. Assume that the 'Entry' fields of *FDE are |
michael@0 | 776 | // initialized; use FDE->fields and FDE->end as the start and limit for |
michael@0 | 777 | // parsing. Assume that FDE->cie is fully initialized. On success, |
michael@0 | 778 | // populate the rest of *FDE, and return true; on failure, report the |
michael@0 | 779 | // problem and return false. |
michael@0 | 780 | bool ReadFDEFields(FDE *fde); |
michael@0 | 781 | |
michael@0 | 782 | // Report that ENTRY is incomplete, and return false. This is just a |
michael@0 | 783 | // trivial wrapper for invoking reporter_->Incomplete; it provides a |
michael@0 | 784 | // little brevity. |
michael@0 | 785 | bool ReportIncomplete(Entry *entry); |
michael@0 | 786 | |
michael@0 | 787 | // Return true if ENCODING has the DW_EH_PE_indirect bit set. |
michael@0 | 788 | static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { |
michael@0 | 789 | return encoding & DW_EH_PE_indirect; |
michael@0 | 790 | } |
michael@0 | 791 | |
michael@0 | 792 | // The contents of the DWARF .debug_info section we're parsing. |
michael@0 | 793 | const char *buffer_; |
michael@0 | 794 | size_t buffer_length_; |
michael@0 | 795 | |
michael@0 | 796 | // For reading multi-byte values with the appropriate endianness. |
michael@0 | 797 | ByteReader *reader_; |
michael@0 | 798 | |
michael@0 | 799 | // The handler to which we should report the data we find. |
michael@0 | 800 | Handler *handler_; |
michael@0 | 801 | |
michael@0 | 802 | // For reporting problems in the info we're parsing. |
michael@0 | 803 | Reporter *reporter_; |
michael@0 | 804 | |
michael@0 | 805 | // True if we are processing .eh_frame-format data. |
michael@0 | 806 | bool eh_frame_; |
michael@0 | 807 | }; |
michael@0 | 808 | |
michael@0 | 809 | // The handler class for CallFrameInfo. The a CFI parser calls the |
michael@0 | 810 | // member functions of a handler object to report the data it finds. |
michael@0 | 811 | class CallFrameInfo::Handler { |
michael@0 | 812 | public: |
michael@0 | 813 | // The pseudo-register number for the canonical frame address. |
michael@0 | 814 | enum { kCFARegister = -1 }; |
michael@0 | 815 | |
michael@0 | 816 | Handler() { } |
michael@0 | 817 | virtual ~Handler() { } |
michael@0 | 818 | |
michael@0 | 819 | // The parser has found CFI for the machine code at ADDRESS, |
michael@0 | 820 | // extending for LENGTH bytes. OFFSET is the offset of the frame |
michael@0 | 821 | // description entry in the section, for use in error messages. |
michael@0 | 822 | // VERSION is the version number of the CFI format. AUGMENTATION is |
michael@0 | 823 | // a string describing any producer-specific extensions present in |
michael@0 | 824 | // the data. RETURN_ADDRESS is the number of the register that holds |
michael@0 | 825 | // the address to which the function should return. |
michael@0 | 826 | // |
michael@0 | 827 | // Entry should return true to process this CFI, or false to skip to |
michael@0 | 828 | // the next entry. |
michael@0 | 829 | // |
michael@0 | 830 | // The parser invokes Entry for each Frame Description Entry (FDE) |
michael@0 | 831 | // it finds. The parser doesn't report Common Information Entries |
michael@0 | 832 | // to the handler explicitly; instead, if the handler elects to |
michael@0 | 833 | // process a given FDE, the parser reiterates the appropriate CIE's |
michael@0 | 834 | // contents at the beginning of the FDE's rules. |
michael@0 | 835 | virtual bool Entry(size_t offset, uint64 address, uint64 length, |
michael@0 | 836 | uint8 version, const string &augmentation, |
michael@0 | 837 | unsigned return_address) = 0; |
michael@0 | 838 | |
michael@0 | 839 | // When the Entry function returns true, the parser calls these |
michael@0 | 840 | // handler functions repeatedly to describe the rules for recovering |
michael@0 | 841 | // registers at each instruction in the given range of machine code. |
michael@0 | 842 | // Immediately after a call to Entry, the handler should assume that |
michael@0 | 843 | // the rule for each callee-saves register is "unchanged" --- that |
michael@0 | 844 | // is, that the register still has the value it had in the caller. |
michael@0 | 845 | // |
michael@0 | 846 | // If a *Rule function returns true, we continue processing this entry's |
michael@0 | 847 | // instructions. If a *Rule function returns false, we stop evaluating |
michael@0 | 848 | // instructions, and skip to the next entry. Either way, we call End |
michael@0 | 849 | // before going on to the next entry. |
michael@0 | 850 | // |
michael@0 | 851 | // In all of these functions, if the REG parameter is kCFARegister, then |
michael@0 | 852 | // the rule describes how to find the canonical frame address. |
michael@0 | 853 | // kCFARegister may be passed as a BASE_REGISTER argument, meaning that |
michael@0 | 854 | // the canonical frame address should be used as the base address for the |
michael@0 | 855 | // computation. All other REG values will be positive. |
michael@0 | 856 | |
michael@0 | 857 | // At ADDRESS, register REG's value is not recoverable. |
michael@0 | 858 | virtual bool UndefinedRule(uint64 address, int reg) = 0; |
michael@0 | 859 | |
michael@0 | 860 | // At ADDRESS, register REG's value is the same as that it had in |
michael@0 | 861 | // the caller. |
michael@0 | 862 | virtual bool SameValueRule(uint64 address, int reg) = 0; |
michael@0 | 863 | |
michael@0 | 864 | // At ADDRESS, register REG has been saved at offset OFFSET from |
michael@0 | 865 | // BASE_REGISTER. |
michael@0 | 866 | virtual bool OffsetRule(uint64 address, int reg, |
michael@0 | 867 | int base_register, long offset) = 0; |
michael@0 | 868 | |
michael@0 | 869 | // At ADDRESS, the caller's value of register REG is the current |
michael@0 | 870 | // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an |
michael@0 | 871 | // address at which the register's value is saved.) |
michael@0 | 872 | virtual bool ValOffsetRule(uint64 address, int reg, |
michael@0 | 873 | int base_register, long offset) = 0; |
michael@0 | 874 | |
michael@0 | 875 | // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs |
michael@0 | 876 | // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that |
michael@0 | 877 | // BASE_REGISTER is the "home" for REG's saved value: if you want to |
michael@0 | 878 | // assign to a variable whose home is REG in the calling frame, you |
michael@0 | 879 | // should put the value in BASE_REGISTER. |
michael@0 | 880 | virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; |
michael@0 | 881 | |
michael@0 | 882 | // At ADDRESS, the DWARF expression EXPRESSION yields the address at |
michael@0 | 883 | // which REG was saved. |
michael@0 | 884 | virtual bool ExpressionRule(uint64 address, int reg, |
michael@0 | 885 | const string &expression) = 0; |
michael@0 | 886 | |
michael@0 | 887 | // At ADDRESS, the DWARF expression EXPRESSION yields the caller's |
michael@0 | 888 | // value for REG. (This rule doesn't provide an address at which the |
michael@0 | 889 | // register's value is saved.) |
michael@0 | 890 | virtual bool ValExpressionRule(uint64 address, int reg, |
michael@0 | 891 | const string &expression) = 0; |
michael@0 | 892 | |
michael@0 | 893 | // Indicate that the rules for the address range reported by the |
michael@0 | 894 | // last call to Entry are complete. End should return true if |
michael@0 | 895 | // everything is okay, or false if an error has occurred and parsing |
michael@0 | 896 | // should stop. |
michael@0 | 897 | virtual bool End() = 0; |
michael@0 | 898 | |
michael@0 | 899 | // Handler functions for Linux C++ exception handling data. These are |
michael@0 | 900 | // only called if the data includes 'z' augmentation strings. |
michael@0 | 901 | |
michael@0 | 902 | // The Linux C++ ABI uses an extension of the DWARF CFI format to |
michael@0 | 903 | // walk the stack to propagate exceptions from the throw to the |
michael@0 | 904 | // appropriate catch, and do the appropriate cleanups along the way. |
michael@0 | 905 | // CFI entries used for exception handling have two additional data |
michael@0 | 906 | // associated with them: |
michael@0 | 907 | // |
michael@0 | 908 | // - The "language-specific data area" describes which exception |
michael@0 | 909 | // types the function has 'catch' clauses for, and indicates how |
michael@0 | 910 | // to go about re-entering the function at the appropriate catch |
michael@0 | 911 | // clause. If the exception is not caught, it describes the |
michael@0 | 912 | // destructors that must run before the frame is popped. |
michael@0 | 913 | // |
michael@0 | 914 | // - The "personality routine" is responsible for interpreting the |
michael@0 | 915 | // language-specific data area's contents, and deciding whether |
michael@0 | 916 | // the exception should continue to propagate down the stack, |
michael@0 | 917 | // perhaps after doing some cleanup for this frame, or whether the |
michael@0 | 918 | // exception will be caught here. |
michael@0 | 919 | // |
michael@0 | 920 | // In principle, the language-specific data area is opaque to |
michael@0 | 921 | // everybody but the personality routine. In practice, these values |
michael@0 | 922 | // may be useful or interesting to readers with extra context, and |
michael@0 | 923 | // we have to at least skip them anyway, so we might as well report |
michael@0 | 924 | // them to the handler. |
michael@0 | 925 | |
michael@0 | 926 | // This entry's exception handling personality routine's address is |
michael@0 | 927 | // ADDRESS. If INDIRECT is true, then ADDRESS is the address at |
michael@0 | 928 | // which the routine's address is stored. The default definition for |
michael@0 | 929 | // this handler function simply returns true, allowing parsing of |
michael@0 | 930 | // the entry to continue. |
michael@0 | 931 | virtual bool PersonalityRoutine(uint64 address, bool indirect) { |
michael@0 | 932 | return true; |
michael@0 | 933 | } |
michael@0 | 934 | |
michael@0 | 935 | // This entry's language-specific data area (LSDA) is located at |
michael@0 | 936 | // ADDRESS. If INDIRECT is true, then ADDRESS is the address at |
michael@0 | 937 | // which the area's address is stored. The default definition for |
michael@0 | 938 | // this handler function simply returns true, allowing parsing of |
michael@0 | 939 | // the entry to continue. |
michael@0 | 940 | virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { |
michael@0 | 941 | return true; |
michael@0 | 942 | } |
michael@0 | 943 | |
michael@0 | 944 | // This entry describes a signal trampoline --- this frame is the |
michael@0 | 945 | // caller of a signal handler. The default definition for this |
michael@0 | 946 | // handler function simply returns true, allowing parsing of the |
michael@0 | 947 | // entry to continue. |
michael@0 | 948 | // |
michael@0 | 949 | // The best description of the rationale for and meaning of signal |
michael@0 | 950 | // trampoline CFI entries seems to be in the GCC bug database: |
michael@0 | 951 | // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 |
michael@0 | 952 | virtual bool SignalHandler() { return true; } |
michael@0 | 953 | }; |
michael@0 | 954 | |
michael@0 | 955 | // The CallFrameInfo class makes calls on an instance of this class to |
michael@0 | 956 | // report errors or warn about problems in the data it is parsing. The |
michael@0 | 957 | // default definitions of these methods print a message to stderr, but |
michael@0 | 958 | // you can make a derived class that overrides them. |
michael@0 | 959 | class CallFrameInfo::Reporter { |
michael@0 | 960 | public: |
michael@0 | 961 | // Create an error reporter which attributes troubles to the section |
michael@0 | 962 | // named SECTION in FILENAME. |
michael@0 | 963 | // |
michael@0 | 964 | // Normally SECTION would be .debug_frame, but the Mac puts CFI data |
michael@0 | 965 | // in a Mach-O section named __debug_frame. If we support |
michael@0 | 966 | // Linux-style exception handling data, we could be reading an |
michael@0 | 967 | // .eh_frame section. |
michael@0 | 968 | Reporter(const string &filename, |
michael@0 | 969 | const string §ion = ".debug_frame") |
michael@0 | 970 | : filename_(filename), section_(section) { } |
michael@0 | 971 | virtual ~Reporter() { } |
michael@0 | 972 | |
michael@0 | 973 | // The CFI entry at OFFSET ends too early to be well-formed. KIND |
michael@0 | 974 | // indicates what kind of entry it is; KIND can be kUnknown if we |
michael@0 | 975 | // haven't parsed enough of the entry to tell yet. |
michael@0 | 976 | virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); |
michael@0 | 977 | |
michael@0 | 978 | // The .eh_frame data has a four-byte zero at OFFSET where the next |
michael@0 | 979 | // entry's length would be; this is a terminator. However, the buffer |
michael@0 | 980 | // length as given to the CallFrameInfo constructor says there should be |
michael@0 | 981 | // more data. |
michael@0 | 982 | virtual void EarlyEHTerminator(uint64 offset); |
michael@0 | 983 | |
michael@0 | 984 | // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the |
michael@0 | 985 | // section is not that large. |
michael@0 | 986 | virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); |
michael@0 | 987 | |
michael@0 | 988 | // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry |
michael@0 | 989 | // there is not a CIE. |
michael@0 | 990 | virtual void BadCIEId(uint64 offset, uint64 cie_offset); |
michael@0 | 991 | |
michael@0 | 992 | // The FDE at OFFSET refers to a CIE with version number VERSION, |
michael@0 | 993 | // which we don't recognize. We cannot parse DWARF CFI if it uses |
michael@0 | 994 | // a version number we don't recognize. |
michael@0 | 995 | virtual void UnrecognizedVersion(uint64 offset, int version); |
michael@0 | 996 | |
michael@0 | 997 | // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, |
michael@0 | 998 | // which we don't recognize. We cannot parse DWARF CFI if it uses |
michael@0 | 999 | // augmentations we don't recognize. |
michael@0 | 1000 | virtual void UnrecognizedAugmentation(uint64 offset, |
michael@0 | 1001 | const string &augmentation); |
michael@0 | 1002 | |
michael@0 | 1003 | // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not |
michael@0 | 1004 | // a valid encoding. |
michael@0 | 1005 | virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); |
michael@0 | 1006 | |
michael@0 | 1007 | // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends |
michael@0 | 1008 | // on a base address which has not been supplied. |
michael@0 | 1009 | virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); |
michael@0 | 1010 | |
michael@0 | 1011 | // The CIE at OFFSET contains a DW_CFA_restore instruction at |
michael@0 | 1012 | // INSN_OFFSET, which may not appear in a CIE. |
michael@0 | 1013 | virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); |
michael@0 | 1014 | |
michael@0 | 1015 | // The entry at OFFSET, of kind KIND, has an unrecognized |
michael@0 | 1016 | // instruction at INSN_OFFSET. |
michael@0 | 1017 | virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, |
michael@0 | 1018 | uint64 insn_offset); |
michael@0 | 1019 | |
michael@0 | 1020 | // The instruction at INSN_OFFSET in the entry at OFFSET, of kind |
michael@0 | 1021 | // KIND, establishes a rule that cites the CFA, but we have not |
michael@0 | 1022 | // established a CFA rule yet. |
michael@0 | 1023 | virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, |
michael@0 | 1024 | uint64 insn_offset); |
michael@0 | 1025 | |
michael@0 | 1026 | // The instruction at INSN_OFFSET in the entry at OFFSET, of kind |
michael@0 | 1027 | // KIND, is a DW_CFA_restore_state instruction, but the stack of |
michael@0 | 1028 | // saved states is empty. |
michael@0 | 1029 | virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, |
michael@0 | 1030 | uint64 insn_offset); |
michael@0 | 1031 | |
michael@0 | 1032 | // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry |
michael@0 | 1033 | // at OFFSET, of kind KIND, would restore a state that has no CFA |
michael@0 | 1034 | // rule, whereas the current state does have a CFA rule. This is |
michael@0 | 1035 | // bogus input, which the CallFrameInfo::Handler interface doesn't |
michael@0 | 1036 | // (and shouldn't) have any way to report. |
michael@0 | 1037 | virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, |
michael@0 | 1038 | uint64 insn_offset); |
michael@0 | 1039 | |
michael@0 | 1040 | protected: |
michael@0 | 1041 | // The name of the file whose CFI we're reading. |
michael@0 | 1042 | string filename_; |
michael@0 | 1043 | |
michael@0 | 1044 | // The name of the CFI section in that file. |
michael@0 | 1045 | string section_; |
michael@0 | 1046 | }; |
michael@0 | 1047 | |
michael@0 | 1048 | } // namespace dwarf2reader |
michael@0 | 1049 | |
michael@0 | 1050 | #endif // UTIL_DEBUGINFO_DWARF2READER_H__ |