Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
michael@0 | 3 | |
michael@0 | 4 | // Copyright 2006, 2010 Google Inc. All Rights Reserved. |
michael@0 | 5 | // |
michael@0 | 6 | // Redistribution and use in source and binary forms, with or without |
michael@0 | 7 | // modification, are permitted provided that the following conditions are |
michael@0 | 8 | // met: |
michael@0 | 9 | // |
michael@0 | 10 | // * Redistributions of source code must retain the above copyright |
michael@0 | 11 | // notice, this list of conditions and the following disclaimer. |
michael@0 | 12 | // * Redistributions in binary form must reproduce the above |
michael@0 | 13 | // copyright notice, this list of conditions and the following disclaimer |
michael@0 | 14 | // in the documentation and/or other materials provided with the |
michael@0 | 15 | // distribution. |
michael@0 | 16 | // * Neither the name of Google Inc. nor the names of its |
michael@0 | 17 | // contributors may be used to endorse or promote products derived from |
michael@0 | 18 | // this software without specific prior written permission. |
michael@0 | 19 | // |
michael@0 | 20 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
michael@0 | 21 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
michael@0 | 22 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
michael@0 | 23 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
michael@0 | 24 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
michael@0 | 25 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
michael@0 | 26 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
michael@0 | 27 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
michael@0 | 28 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
michael@0 | 29 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
michael@0 | 30 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 31 | |
michael@0 | 32 | // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
michael@0 | 33 | |
michael@0 | 34 | // This file is derived from the following files in |
michael@0 | 35 | // toolkit/crashreporter/google-breakpad: |
michael@0 | 36 | // src/common/dwarf/types.h |
michael@0 | 37 | // src/common/dwarf/dwarf2enums.h |
michael@0 | 38 | // src/common/dwarf/bytereader.h |
michael@0 | 39 | // src/common/dwarf_cfi_to_module.h |
michael@0 | 40 | // src/common/dwarf/dwarf2reader.h |
michael@0 | 41 | |
michael@0 | 42 | #ifndef LulDwarfExt_h |
michael@0 | 43 | #define LulDwarfExt_h |
michael@0 | 44 | |
michael@0 | 45 | #include <stdint.h> |
michael@0 | 46 | |
michael@0 | 47 | #include "mozilla/Assertions.h" |
michael@0 | 48 | |
michael@0 | 49 | #include "LulDwarfSummariser.h" |
michael@0 | 50 | |
michael@0 | 51 | typedef signed char int8; |
michael@0 | 52 | typedef short int16; |
michael@0 | 53 | typedef int int32; |
michael@0 | 54 | typedef long long int64; |
michael@0 | 55 | |
michael@0 | 56 | typedef unsigned char uint8; |
michael@0 | 57 | typedef unsigned short uint16; |
michael@0 | 58 | typedef unsigned int uint32; |
michael@0 | 59 | typedef unsigned long long uint64; |
michael@0 | 60 | |
michael@0 | 61 | #ifdef __PTRDIFF_TYPE__ |
michael@0 | 62 | typedef __PTRDIFF_TYPE__ intptr; |
michael@0 | 63 | typedef unsigned __PTRDIFF_TYPE__ uintptr; |
michael@0 | 64 | #else |
michael@0 | 65 | #error "Can't find pointer-sized integral types." |
michael@0 | 66 | #endif |
michael@0 | 67 | |
michael@0 | 68 | |
michael@0 | 69 | namespace lul { |
michael@0 | 70 | |
michael@0 | 71 | // Exception handling frame description pointer formats, as described |
michael@0 | 72 | // by the Linux Standard Base Core Specification 4.0, section 11.5, |
michael@0 | 73 | // DWARF Extensions. |
michael@0 | 74 | enum DwarfPointerEncoding |
michael@0 | 75 | { |
michael@0 | 76 | DW_EH_PE_absptr = 0x00, |
michael@0 | 77 | DW_EH_PE_omit = 0xff, |
michael@0 | 78 | DW_EH_PE_uleb128 = 0x01, |
michael@0 | 79 | DW_EH_PE_udata2 = 0x02, |
michael@0 | 80 | DW_EH_PE_udata4 = 0x03, |
michael@0 | 81 | DW_EH_PE_udata8 = 0x04, |
michael@0 | 82 | DW_EH_PE_sleb128 = 0x09, |
michael@0 | 83 | DW_EH_PE_sdata2 = 0x0A, |
michael@0 | 84 | DW_EH_PE_sdata4 = 0x0B, |
michael@0 | 85 | DW_EH_PE_sdata8 = 0x0C, |
michael@0 | 86 | DW_EH_PE_pcrel = 0x10, |
michael@0 | 87 | DW_EH_PE_textrel = 0x20, |
michael@0 | 88 | DW_EH_PE_datarel = 0x30, |
michael@0 | 89 | DW_EH_PE_funcrel = 0x40, |
michael@0 | 90 | DW_EH_PE_aligned = 0x50, |
michael@0 | 91 | |
michael@0 | 92 | // The GNU toolchain sources define this enum value as well, |
michael@0 | 93 | // simply to help classify the lower nybble values into signed and |
michael@0 | 94 | // unsigned groups. |
michael@0 | 95 | DW_EH_PE_signed = 0x08, |
michael@0 | 96 | |
michael@0 | 97 | // This is not documented in LSB 4.0, but it is used in both the |
michael@0 | 98 | // Linux and OS X toolchains. It can be added to any other |
michael@0 | 99 | // encoding (except DW_EH_PE_aligned), and indicates that the |
michael@0 | 100 | // encoded value represents the address at which the true address |
michael@0 | 101 | // is stored, not the true address itself. |
michael@0 | 102 | DW_EH_PE_indirect = 0x80 |
michael@0 | 103 | }; |
michael@0 | 104 | |
michael@0 | 105 | |
michael@0 | 106 | // We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN |
michael@0 | 107 | // because it conflicts with a macro |
michael@0 | 108 | enum Endianness { |
michael@0 | 109 | ENDIANNESS_BIG, |
michael@0 | 110 | ENDIANNESS_LITTLE |
michael@0 | 111 | }; |
michael@0 | 112 | |
michael@0 | 113 | // A ByteReader knows how to read single- and multi-byte values of |
michael@0 | 114 | // various endiannesses, sizes, and encodings, as used in DWARF |
michael@0 | 115 | // debugging information and Linux C++ exception handling data. |
michael@0 | 116 | class ByteReader { |
michael@0 | 117 | public: |
michael@0 | 118 | // Construct a ByteReader capable of reading one-, two-, four-, and |
michael@0 | 119 | // eight-byte values according to ENDIANNESS, absolute machine-sized |
michael@0 | 120 | // addresses, DWARF-style "initial length" values, signed and |
michael@0 | 121 | // unsigned LEB128 numbers, and Linux C++ exception handling data's |
michael@0 | 122 | // encoded pointers. |
michael@0 | 123 | explicit ByteReader(enum Endianness endianness); |
michael@0 | 124 | virtual ~ByteReader(); |
michael@0 | 125 | |
michael@0 | 126 | // Read a single byte from BUFFER and return it as an unsigned 8 bit |
michael@0 | 127 | // number. |
michael@0 | 128 | uint8 ReadOneByte(const char* buffer) const; |
michael@0 | 129 | |
michael@0 | 130 | // Read two bytes from BUFFER and return them as an unsigned 16 bit |
michael@0 | 131 | // number, using this ByteReader's endianness. |
michael@0 | 132 | uint16 ReadTwoBytes(const char* buffer) const; |
michael@0 | 133 | |
michael@0 | 134 | // Read four bytes from BUFFER and return them as an unsigned 32 bit |
michael@0 | 135 | // number, using this ByteReader's endianness. This function returns |
michael@0 | 136 | // a uint64 so that it is compatible with ReadAddress and |
michael@0 | 137 | // ReadOffset. The number it returns will never be outside the range |
michael@0 | 138 | // of an unsigned 32 bit integer. |
michael@0 | 139 | uint64 ReadFourBytes(const char* buffer) const; |
michael@0 | 140 | |
michael@0 | 141 | // Read eight bytes from BUFFER and return them as an unsigned 64 |
michael@0 | 142 | // bit number, using this ByteReader's endianness. |
michael@0 | 143 | uint64 ReadEightBytes(const char* buffer) const; |
michael@0 | 144 | |
michael@0 | 145 | // Read an unsigned LEB128 (Little Endian Base 128) number from |
michael@0 | 146 | // BUFFER and return it as an unsigned 64 bit integer. Set LEN to |
michael@0 | 147 | // the number of bytes read. |
michael@0 | 148 | // |
michael@0 | 149 | // The unsigned LEB128 representation of an integer N is a variable |
michael@0 | 150 | // number of bytes: |
michael@0 | 151 | // |
michael@0 | 152 | // - If N is between 0 and 0x7f, then its unsigned LEB128 |
michael@0 | 153 | // representation is a single byte whose value is N. |
michael@0 | 154 | // |
michael@0 | 155 | // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | |
michael@0 | 156 | // 0x80, followed by the unsigned LEB128 representation of N / |
michael@0 | 157 | // 128, rounded towards negative infinity. |
michael@0 | 158 | // |
michael@0 | 159 | // In other words, we break VALUE into groups of seven bits, put |
michael@0 | 160 | // them in little-endian order, and then write them as eight-bit |
michael@0 | 161 | // bytes with the high bit on all but the last. |
michael@0 | 162 | uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; |
michael@0 | 163 | |
michael@0 | 164 | // Read a signed LEB128 number from BUFFER and return it as an |
michael@0 | 165 | // signed 64 bit integer. Set LEN to the number of bytes read. |
michael@0 | 166 | // |
michael@0 | 167 | // The signed LEB128 representation of an integer N is a variable |
michael@0 | 168 | // number of bytes: |
michael@0 | 169 | // |
michael@0 | 170 | // - If N is between -0x40 and 0x3f, then its signed LEB128 |
michael@0 | 171 | // representation is a single byte whose value is N in two's |
michael@0 | 172 | // complement. |
michael@0 | 173 | // |
michael@0 | 174 | // - Otherwise, its signed LEB128 representation is (N & 0x7f) | |
michael@0 | 175 | // 0x80, followed by the signed LEB128 representation of N / 128, |
michael@0 | 176 | // rounded towards negative infinity. |
michael@0 | 177 | // |
michael@0 | 178 | // In other words, we break VALUE into groups of seven bits, put |
michael@0 | 179 | // them in little-endian order, and then write them as eight-bit |
michael@0 | 180 | // bytes with the high bit on all but the last. |
michael@0 | 181 | int64 ReadSignedLEB128(const char* buffer, size_t* len) const; |
michael@0 | 182 | |
michael@0 | 183 | // Indicate that addresses on this architecture are SIZE bytes long. SIZE |
michael@0 | 184 | // must be either 4 or 8. (DWARF allows addresses to be any number of |
michael@0 | 185 | // bytes in length from 1 to 255, but we only support 32- and 64-bit |
michael@0 | 186 | // addresses at the moment.) You must call this before using the |
michael@0 | 187 | // ReadAddress member function. |
michael@0 | 188 | // |
michael@0 | 189 | // For data in a .debug_info section, or something that .debug_info |
michael@0 | 190 | // refers to like line number or macro data, the compilation unit |
michael@0 | 191 | // header's address_size field indicates the address size to use. Call |
michael@0 | 192 | // frame information doesn't indicate its address size (a shortcoming of |
michael@0 | 193 | // the spec); you must supply the appropriate size based on the |
michael@0 | 194 | // architecture of the target machine. |
michael@0 | 195 | void SetAddressSize(uint8 size); |
michael@0 | 196 | |
michael@0 | 197 | // Return the current address size, in bytes. This is either 4, |
michael@0 | 198 | // indicating 32-bit addresses, or 8, indicating 64-bit addresses. |
michael@0 | 199 | uint8 AddressSize() const { return address_size_; } |
michael@0 | 200 | |
michael@0 | 201 | // Read an address from BUFFER and return it as an unsigned 64 bit |
michael@0 | 202 | // integer, respecting this ByteReader's endianness and address size. You |
michael@0 | 203 | // must call SetAddressSize before calling this function. |
michael@0 | 204 | uint64 ReadAddress(const char* buffer) const; |
michael@0 | 205 | |
michael@0 | 206 | // DWARF actually defines two slightly different formats: 32-bit DWARF |
michael@0 | 207 | // and 64-bit DWARF. This is *not* related to the size of registers or |
michael@0 | 208 | // addresses on the target machine; it refers only to the size of section |
michael@0 | 209 | // offsets and data lengths appearing in the DWARF data. One only needs |
michael@0 | 210 | // 64-bit DWARF when the debugging data itself is larger than 4GiB. |
michael@0 | 211 | // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the |
michael@0 | 212 | // debugging data itself is very large. |
michael@0 | 213 | // |
michael@0 | 214 | // DWARF information identifies itself as 32-bit or 64-bit DWARF: each |
michael@0 | 215 | // compilation unit and call frame information entry begins with an |
michael@0 | 216 | // "initial length" field, which, in addition to giving the length of the |
michael@0 | 217 | // data, also indicates the size of section offsets and lengths appearing |
michael@0 | 218 | // in that data. The ReadInitialLength member function, below, reads an |
michael@0 | 219 | // initial length and sets the ByteReader's offset size as a side effect. |
michael@0 | 220 | // Thus, in the normal process of reading DWARF data, the appropriate |
michael@0 | 221 | // offset size is set automatically. So, you should only need to call |
michael@0 | 222 | // SetOffsetSize if you are using the same ByteReader to jump from the |
michael@0 | 223 | // midst of one block of DWARF data into another. |
michael@0 | 224 | |
michael@0 | 225 | // Read a DWARF "initial length" field from START, and return it as |
michael@0 | 226 | // an unsigned 64 bit integer, respecting this ByteReader's |
michael@0 | 227 | // endianness. Set *LEN to the length of the initial length in |
michael@0 | 228 | // bytes, either four or twelve. As a side effect, set this |
michael@0 | 229 | // ByteReader's offset size to either 4 (if we see a 32-bit DWARF |
michael@0 | 230 | // initial length) or 8 (if we see a 64-bit DWARF initial length). |
michael@0 | 231 | // |
michael@0 | 232 | // A DWARF initial length is either: |
michael@0 | 233 | // |
michael@0 | 234 | // - a byte count stored as an unsigned 32-bit value less than |
michael@0 | 235 | // 0xffffff00, indicating that the data whose length is being |
michael@0 | 236 | // measured uses the 32-bit DWARF format, or |
michael@0 | 237 | // |
michael@0 | 238 | // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, |
michael@0 | 239 | // indicating that the data whose length is being measured uses |
michael@0 | 240 | // the 64-bit DWARF format. |
michael@0 | 241 | uint64 ReadInitialLength(const char* start, size_t* len); |
michael@0 | 242 | |
michael@0 | 243 | // Read an offset from BUFFER and return it as an unsigned 64 bit |
michael@0 | 244 | // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the |
michael@0 | 245 | // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes |
michael@0 | 246 | // long. You must call ReadInitialLength or SetOffsetSize before calling |
michael@0 | 247 | // this function; see the comments above for details. |
michael@0 | 248 | uint64 ReadOffset(const char* buffer) const; |
michael@0 | 249 | |
michael@0 | 250 | // Return the current offset size, in bytes. |
michael@0 | 251 | // A return value of 4 indicates that we are reading 32-bit DWARF. |
michael@0 | 252 | // A return value of 8 indicates that we are reading 64-bit DWARF. |
michael@0 | 253 | uint8 OffsetSize() const { return offset_size_; } |
michael@0 | 254 | |
michael@0 | 255 | // Indicate that section offsets and lengths are SIZE bytes long. SIZE |
michael@0 | 256 | // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). |
michael@0 | 257 | // Usually, you should not call this function yourself; instead, let a |
michael@0 | 258 | // call to ReadInitialLength establish the data's offset size |
michael@0 | 259 | // automatically. |
michael@0 | 260 | void SetOffsetSize(uint8 size); |
michael@0 | 261 | |
michael@0 | 262 | // The Linux C++ ABI uses a variant of DWARF call frame information |
michael@0 | 263 | // for exception handling. This data is included in the program's |
michael@0 | 264 | // address space as the ".eh_frame" section, and intepreted at |
michael@0 | 265 | // runtime to walk the stack, find exception handlers, and run |
michael@0 | 266 | // cleanup code. The format is mostly the same as DWARF CFI, with |
michael@0 | 267 | // some adjustments made to provide the additional |
michael@0 | 268 | // exception-handling data, and to make the data easier to work with |
michael@0 | 269 | // in memory --- for example, to allow it to be placed in read-only |
michael@0 | 270 | // memory even when describing position-independent code. |
michael@0 | 271 | // |
michael@0 | 272 | // In particular, exception handling data can select a number of |
michael@0 | 273 | // different encodings for pointers that appear in the data, as |
michael@0 | 274 | // described by the DwarfPointerEncoding enum. There are actually |
michael@0 | 275 | // four axes(!) to the encoding: |
michael@0 | 276 | // |
michael@0 | 277 | // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use |
michael@0 | 278 | // the DWARF LEB128 encoding. |
michael@0 | 279 | // |
michael@0 | 280 | // - The pointer's signedness: pointers can be signed or unsigned. |
michael@0 | 281 | // |
michael@0 | 282 | // - The pointer's base address: the data stored in the exception |
michael@0 | 283 | // handling data can be the actual address (that is, an absolute |
michael@0 | 284 | // pointer), or relative to one of a number of different base |
michael@0 | 285 | // addreses --- including that of the encoded pointer itself, for |
michael@0 | 286 | // a form of "pc-relative" addressing. |
michael@0 | 287 | // |
michael@0 | 288 | // - The pointer may be indirect: it may be the address where the |
michael@0 | 289 | // true pointer is stored. (This is used to refer to things via |
michael@0 | 290 | // global offset table entries, program linkage table entries, or |
michael@0 | 291 | // other tricks used in position-independent code.) |
michael@0 | 292 | // |
michael@0 | 293 | // There are also two options that fall outside that matrix |
michael@0 | 294 | // altogether: the pointer may be omitted, or it may have padding to |
michael@0 | 295 | // align it on an appropriate address boundary. (That last option |
michael@0 | 296 | // may seem like it should be just another axis, but it is not.) |
michael@0 | 297 | |
michael@0 | 298 | // Indicate that the exception handling data is loaded starting at |
michael@0 | 299 | // SECTION_BASE, and that the start of its buffer in our own memory |
michael@0 | 300 | // is BUFFER_BASE. This allows us to find the address that a given |
michael@0 | 301 | // byte in our buffer would have when loaded into the program the |
michael@0 | 302 | // data describes. We need this to resolve DW_EH_PE_pcrel pointers. |
michael@0 | 303 | void SetCFIDataBase(uint64 section_base, const char *buffer_base); |
michael@0 | 304 | |
michael@0 | 305 | // Indicate that the base address of the program's ".text" section |
michael@0 | 306 | // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. |
michael@0 | 307 | void SetTextBase(uint64 text_base); |
michael@0 | 308 | |
michael@0 | 309 | // Indicate that the base address for DW_EH_PE_datarel pointers is |
michael@0 | 310 | // DATA_BASE. The proper value depends on the ABI; it is usually the |
michael@0 | 311 | // address of the global offset table, held in a designated register in |
michael@0 | 312 | // position-independent code. You will need to look at the startup code |
michael@0 | 313 | // for the target system to be sure. I tried; my eyes bled. |
michael@0 | 314 | void SetDataBase(uint64 data_base); |
michael@0 | 315 | |
michael@0 | 316 | // Indicate that the base address for the FDE we are processing is |
michael@0 | 317 | // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel |
michael@0 | 318 | // pointers. (This encoding does not seem to be used by the GNU |
michael@0 | 319 | // toolchain.) |
michael@0 | 320 | void SetFunctionBase(uint64 function_base); |
michael@0 | 321 | |
michael@0 | 322 | // Indicate that we are no longer processing any FDE, so any use of |
michael@0 | 323 | // a DW_EH_PE_funcrel encoding is an error. |
michael@0 | 324 | void ClearFunctionBase(); |
michael@0 | 325 | |
michael@0 | 326 | // Return true if ENCODING is a valid pointer encoding. |
michael@0 | 327 | bool ValidEncoding(DwarfPointerEncoding encoding) const; |
michael@0 | 328 | |
michael@0 | 329 | // Return true if we have all the information we need to read a |
michael@0 | 330 | // pointer that uses ENCODING. This checks that the appropriate |
michael@0 | 331 | // SetFooBase function for ENCODING has been called. |
michael@0 | 332 | bool UsableEncoding(DwarfPointerEncoding encoding) const; |
michael@0 | 333 | |
michael@0 | 334 | // Read an encoded pointer from BUFFER using ENCODING; return the |
michael@0 | 335 | // absolute address it represents, and set *LEN to the pointer's |
michael@0 | 336 | // length in bytes, including any padding for aligned pointers. |
michael@0 | 337 | // |
michael@0 | 338 | // This function calls 'abort' if ENCODING is invalid or refers to a |
michael@0 | 339 | // base address this reader hasn't been given, so you should check |
michael@0 | 340 | // with ValidEncoding and UsableEncoding first if you would rather |
michael@0 | 341 | // die in a more helpful way. |
michael@0 | 342 | uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, |
michael@0 | 343 | size_t *len) const; |
michael@0 | 344 | |
michael@0 | 345 | private: |
michael@0 | 346 | |
michael@0 | 347 | // Function pointer type for our address and offset readers. |
michael@0 | 348 | typedef uint64 (ByteReader::*AddressReader)(const char*) const; |
michael@0 | 349 | |
michael@0 | 350 | // Read an offset from BUFFER and return it as an unsigned 64 bit |
michael@0 | 351 | // integer. DWARF2/3 define offsets as either 4 or 8 bytes, |
michael@0 | 352 | // generally depending on the amount of DWARF2/3 info present. |
michael@0 | 353 | // This function pointer gets set by SetOffsetSize. |
michael@0 | 354 | AddressReader offset_reader_; |
michael@0 | 355 | |
michael@0 | 356 | // Read an address from BUFFER and return it as an unsigned 64 bit |
michael@0 | 357 | // integer. DWARF2/3 allow addresses to be any size from 0-255 |
michael@0 | 358 | // bytes currently. Internally we support 4 and 8 byte addresses, |
michael@0 | 359 | // and will CHECK on anything else. |
michael@0 | 360 | // This function pointer gets set by SetAddressSize. |
michael@0 | 361 | AddressReader address_reader_; |
michael@0 | 362 | |
michael@0 | 363 | Endianness endian_; |
michael@0 | 364 | uint8 address_size_; |
michael@0 | 365 | uint8 offset_size_; |
michael@0 | 366 | |
michael@0 | 367 | // Base addresses for Linux C++ exception handling data's encoded pointers. |
michael@0 | 368 | bool have_section_base_, have_text_base_, have_data_base_; |
michael@0 | 369 | bool have_function_base_; |
michael@0 | 370 | uint64 section_base_; |
michael@0 | 371 | uint64 text_base_, data_base_, function_base_; |
michael@0 | 372 | const char *buffer_base_; |
michael@0 | 373 | }; |
michael@0 | 374 | |
michael@0 | 375 | |
michael@0 | 376 | inline uint8 ByteReader::ReadOneByte(const char* buffer) const { |
michael@0 | 377 | return buffer[0]; |
michael@0 | 378 | } |
michael@0 | 379 | |
michael@0 | 380 | inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const { |
michael@0 | 381 | const unsigned char *buffer |
michael@0 | 382 | = reinterpret_cast<const unsigned char *>(signed_buffer); |
michael@0 | 383 | const uint16 buffer0 = buffer[0]; |
michael@0 | 384 | const uint16 buffer1 = buffer[1]; |
michael@0 | 385 | if (endian_ == ENDIANNESS_LITTLE) { |
michael@0 | 386 | return buffer0 | buffer1 << 8; |
michael@0 | 387 | } else { |
michael@0 | 388 | return buffer1 | buffer0 << 8; |
michael@0 | 389 | } |
michael@0 | 390 | } |
michael@0 | 391 | |
michael@0 | 392 | inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const { |
michael@0 | 393 | const unsigned char *buffer |
michael@0 | 394 | = reinterpret_cast<const unsigned char *>(signed_buffer); |
michael@0 | 395 | const uint32 buffer0 = buffer[0]; |
michael@0 | 396 | const uint32 buffer1 = buffer[1]; |
michael@0 | 397 | const uint32 buffer2 = buffer[2]; |
michael@0 | 398 | const uint32 buffer3 = buffer[3]; |
michael@0 | 399 | if (endian_ == ENDIANNESS_LITTLE) { |
michael@0 | 400 | return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; |
michael@0 | 401 | } else { |
michael@0 | 402 | return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; |
michael@0 | 403 | } |
michael@0 | 404 | } |
michael@0 | 405 | |
michael@0 | 406 | inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const { |
michael@0 | 407 | const unsigned char *buffer |
michael@0 | 408 | = reinterpret_cast<const unsigned char *>(signed_buffer); |
michael@0 | 409 | const uint64 buffer0 = buffer[0]; |
michael@0 | 410 | const uint64 buffer1 = buffer[1]; |
michael@0 | 411 | const uint64 buffer2 = buffer[2]; |
michael@0 | 412 | const uint64 buffer3 = buffer[3]; |
michael@0 | 413 | const uint64 buffer4 = buffer[4]; |
michael@0 | 414 | const uint64 buffer5 = buffer[5]; |
michael@0 | 415 | const uint64 buffer6 = buffer[6]; |
michael@0 | 416 | const uint64 buffer7 = buffer[7]; |
michael@0 | 417 | if (endian_ == ENDIANNESS_LITTLE) { |
michael@0 | 418 | return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | |
michael@0 | 419 | buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; |
michael@0 | 420 | } else { |
michael@0 | 421 | return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | |
michael@0 | 422 | buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; |
michael@0 | 423 | } |
michael@0 | 424 | } |
michael@0 | 425 | |
michael@0 | 426 | // Read an unsigned LEB128 number. Each byte contains 7 bits of |
michael@0 | 427 | // information, plus one bit saying whether the number continues or |
michael@0 | 428 | // not. |
michael@0 | 429 | |
michael@0 | 430 | inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, |
michael@0 | 431 | size_t* len) const { |
michael@0 | 432 | uint64 result = 0; |
michael@0 | 433 | size_t num_read = 0; |
michael@0 | 434 | unsigned int shift = 0; |
michael@0 | 435 | unsigned char byte; |
michael@0 | 436 | |
michael@0 | 437 | do { |
michael@0 | 438 | byte = *buffer++; |
michael@0 | 439 | num_read++; |
michael@0 | 440 | |
michael@0 | 441 | result |= (static_cast<uint64>(byte & 0x7f)) << shift; |
michael@0 | 442 | |
michael@0 | 443 | shift += 7; |
michael@0 | 444 | |
michael@0 | 445 | } while (byte & 0x80); |
michael@0 | 446 | |
michael@0 | 447 | *len = num_read; |
michael@0 | 448 | |
michael@0 | 449 | return result; |
michael@0 | 450 | } |
michael@0 | 451 | |
michael@0 | 452 | // Read a signed LEB128 number. These are like regular LEB128 |
michael@0 | 453 | // numbers, except the last byte may have a sign bit set. |
michael@0 | 454 | |
michael@0 | 455 | inline int64 ByteReader::ReadSignedLEB128(const char* buffer, |
michael@0 | 456 | size_t* len) const { |
michael@0 | 457 | int64 result = 0; |
michael@0 | 458 | unsigned int shift = 0; |
michael@0 | 459 | size_t num_read = 0; |
michael@0 | 460 | unsigned char byte; |
michael@0 | 461 | |
michael@0 | 462 | do { |
michael@0 | 463 | byte = *buffer++; |
michael@0 | 464 | num_read++; |
michael@0 | 465 | result |= (static_cast<uint64>(byte & 0x7f) << shift); |
michael@0 | 466 | shift += 7; |
michael@0 | 467 | } while (byte & 0x80); |
michael@0 | 468 | |
michael@0 | 469 | if ((shift < 8 * sizeof (result)) && (byte & 0x40)) |
michael@0 | 470 | result |= -((static_cast<int64>(1)) << shift); |
michael@0 | 471 | *len = num_read; |
michael@0 | 472 | return result; |
michael@0 | 473 | } |
michael@0 | 474 | |
michael@0 | 475 | inline uint64 ByteReader::ReadOffset(const char* buffer) const { |
michael@0 | 476 | MOZ_ASSERT(this->offset_reader_); |
michael@0 | 477 | return (this->*offset_reader_)(buffer); |
michael@0 | 478 | } |
michael@0 | 479 | |
michael@0 | 480 | inline uint64 ByteReader::ReadAddress(const char* buffer) const { |
michael@0 | 481 | MOZ_ASSERT(this->address_reader_); |
michael@0 | 482 | return (this->*address_reader_)(buffer); |
michael@0 | 483 | } |
michael@0 | 484 | |
michael@0 | 485 | inline void ByteReader::SetCFIDataBase(uint64 section_base, |
michael@0 | 486 | const char *buffer_base) { |
michael@0 | 487 | section_base_ = section_base; |
michael@0 | 488 | buffer_base_ = buffer_base; |
michael@0 | 489 | have_section_base_ = true; |
michael@0 | 490 | } |
michael@0 | 491 | |
michael@0 | 492 | inline void ByteReader::SetTextBase(uint64 text_base) { |
michael@0 | 493 | text_base_ = text_base; |
michael@0 | 494 | have_text_base_ = true; |
michael@0 | 495 | } |
michael@0 | 496 | |
michael@0 | 497 | inline void ByteReader::SetDataBase(uint64 data_base) { |
michael@0 | 498 | data_base_ = data_base; |
michael@0 | 499 | have_data_base_ = true; |
michael@0 | 500 | } |
michael@0 | 501 | |
michael@0 | 502 | inline void ByteReader::SetFunctionBase(uint64 function_base) { |
michael@0 | 503 | function_base_ = function_base; |
michael@0 | 504 | have_function_base_ = true; |
michael@0 | 505 | } |
michael@0 | 506 | |
michael@0 | 507 | inline void ByteReader::ClearFunctionBase() { |
michael@0 | 508 | have_function_base_ = false; |
michael@0 | 509 | } |
michael@0 | 510 | |
michael@0 | 511 | |
michael@0 | 512 | // (derived from) |
michael@0 | 513 | // dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which |
michael@0 | 514 | // accepts parsed DWARF call frame info and adds it to a Summariser object. |
michael@0 | 515 | |
michael@0 | 516 | // This class is a reader for DWARF's Call Frame Information. CFI |
michael@0 | 517 | // describes how to unwind stack frames --- even for functions that do |
michael@0 | 518 | // not follow fixed conventions for saving registers, whose frame size |
michael@0 | 519 | // varies as they execute, etc. |
michael@0 | 520 | // |
michael@0 | 521 | // CFI describes, at each machine instruction, how to compute the |
michael@0 | 522 | // stack frame's base address, how to find the return address, and |
michael@0 | 523 | // where to find the saved values of the caller's registers (if the |
michael@0 | 524 | // callee has stashed them somewhere to free up the registers for its |
michael@0 | 525 | // own use). |
michael@0 | 526 | // |
michael@0 | 527 | // For example, suppose we have a function whose machine code looks |
michael@0 | 528 | // like this (imagine an assembly language that looks like C, for a |
michael@0 | 529 | // machine with 32-bit registers, and a stack that grows towards lower |
michael@0 | 530 | // addresses): |
michael@0 | 531 | // |
michael@0 | 532 | // func: ; entry point; return address at sp |
michael@0 | 533 | // func+0: sp = sp - 16 ; allocate space for stack frame |
michael@0 | 534 | // func+1: sp[12] = r0 ; save r0 at sp+12 |
michael@0 | 535 | // ... ; other code, not frame-related |
michael@0 | 536 | // func+10: sp -= 4; *sp = x ; push some x on the stack |
michael@0 | 537 | // ... ; other code, not frame-related |
michael@0 | 538 | // func+20: r0 = sp[16] ; restore saved r0 |
michael@0 | 539 | // func+21: sp += 20 ; pop whole stack frame |
michael@0 | 540 | // func+22: pc = *sp; sp += 4 ; pop return address and jump to it |
michael@0 | 541 | // |
michael@0 | 542 | // DWARF CFI is (a very compressed representation of) a table with a |
michael@0 | 543 | // row for each machine instruction address and a column for each |
michael@0 | 544 | // register showing how to restore it, if possible. |
michael@0 | 545 | // |
michael@0 | 546 | // A special column named "CFA", for "Canonical Frame Address", tells how |
michael@0 | 547 | // to compute the base address of the frame; registers' entries may |
michael@0 | 548 | // refer to the CFA in describing where the registers are saved. |
michael@0 | 549 | // |
michael@0 | 550 | // Another special column, named "RA", represents the return address. |
michael@0 | 551 | // |
michael@0 | 552 | // For example, here is a complete (uncompressed) table describing the |
michael@0 | 553 | // function above: |
michael@0 | 554 | // |
michael@0 | 555 | // insn cfa r0 r1 ... ra |
michael@0 | 556 | // ======================================= |
michael@0 | 557 | // func+0: sp cfa[0] |
michael@0 | 558 | // func+1: sp+16 cfa[0] |
michael@0 | 559 | // func+2: sp+16 cfa[-4] cfa[0] |
michael@0 | 560 | // func+11: sp+20 cfa[-4] cfa[0] |
michael@0 | 561 | // func+21: sp+20 cfa[0] |
michael@0 | 562 | // func+22: sp cfa[0] |
michael@0 | 563 | // |
michael@0 | 564 | // Some things to note here: |
michael@0 | 565 | // |
michael@0 | 566 | // - Each row describes the state of affairs *before* executing the |
michael@0 | 567 | // instruction at the given address. Thus, the row for func+0 |
michael@0 | 568 | // describes the state before we allocate the stack frame. In the |
michael@0 | 569 | // next row, the formula for computing the CFA has changed, |
michael@0 | 570 | // reflecting that allocation. |
michael@0 | 571 | // |
michael@0 | 572 | // - The other entries are written in terms of the CFA; this allows |
michael@0 | 573 | // them to remain unchanged as the stack pointer gets bumped around. |
michael@0 | 574 | // For example, the rule for recovering the return address (the "ra" |
michael@0 | 575 | // column) remains unchanged throughout the function, even as the |
michael@0 | 576 | // stack pointer takes on three different offsets from the return |
michael@0 | 577 | // address. |
michael@0 | 578 | // |
michael@0 | 579 | // - Although we haven't shown it, most calling conventions designate |
michael@0 | 580 | // "callee-saves" and "caller-saves" registers. The callee must |
michael@0 | 581 | // preserve the values of callee-saves registers; if it uses them, |
michael@0 | 582 | // it must save their original values somewhere, and restore them |
michael@0 | 583 | // before it returns. In contrast, the callee is free to trash |
michael@0 | 584 | // caller-saves registers; if the callee uses these, it will |
michael@0 | 585 | // probably not bother to save them anywhere, and the CFI will |
michael@0 | 586 | // probably mark their values as "unrecoverable". |
michael@0 | 587 | // |
michael@0 | 588 | // (However, since the caller cannot assume the callee was going to |
michael@0 | 589 | // save them, caller-saves registers are probably dead in the caller |
michael@0 | 590 | // anyway, so compilers usually don't generate CFA for caller-saves |
michael@0 | 591 | // registers.) |
michael@0 | 592 | // |
michael@0 | 593 | // - Exactly where the CFA points is a matter of convention that |
michael@0 | 594 | // depends on the architecture and ABI in use. In the example, the |
michael@0 | 595 | // CFA is the value the stack pointer had upon entry to the |
michael@0 | 596 | // function, pointing at the saved return address. But on the x86, |
michael@0 | 597 | // the call frame information generated by GCC follows the |
michael@0 | 598 | // convention that the CFA is the address *after* the saved return |
michael@0 | 599 | // address. |
michael@0 | 600 | // |
michael@0 | 601 | // But by definition, the CFA remains constant throughout the |
michael@0 | 602 | // lifetime of the frame. This makes it a useful value for other |
michael@0 | 603 | // columns to refer to. It is also gives debuggers a useful handle |
michael@0 | 604 | // for identifying a frame. |
michael@0 | 605 | // |
michael@0 | 606 | // If you look at the table above, you'll notice that a given entry is |
michael@0 | 607 | // often the same as the one immediately above it: most instructions |
michael@0 | 608 | // change only one or two aspects of the stack frame, if they affect |
michael@0 | 609 | // it at all. The DWARF format takes advantage of this fact, and |
michael@0 | 610 | // reduces the size of the data by mentioning only the addresses and |
michael@0 | 611 | // columns at which changes take place. So for the above, DWARF CFI |
michael@0 | 612 | // data would only actually mention the following: |
michael@0 | 613 | // |
michael@0 | 614 | // insn cfa r0 r1 ... ra |
michael@0 | 615 | // ======================================= |
michael@0 | 616 | // func+0: sp cfa[0] |
michael@0 | 617 | // func+1: sp+16 |
michael@0 | 618 | // func+2: cfa[-4] |
michael@0 | 619 | // func+11: sp+20 |
michael@0 | 620 | // func+21: r0 |
michael@0 | 621 | // func+22: sp |
michael@0 | 622 | // |
michael@0 | 623 | // In fact, this is the way the parser reports CFI to the consumer: as |
michael@0 | 624 | // a series of statements of the form, "At address X, column Y changed |
michael@0 | 625 | // to Z," and related conventions for describing the initial state. |
michael@0 | 626 | // |
michael@0 | 627 | // Naturally, it would be impractical to have to scan the entire |
michael@0 | 628 | // program's CFI, noting changes as we go, just to recover the |
michael@0 | 629 | // unwinding rules in effect at one particular instruction. To avoid |
michael@0 | 630 | // this, CFI data is grouped into "entries", each of which covers a |
michael@0 | 631 | // specified range of addresses and begins with a complete statement |
michael@0 | 632 | // of the rules for all recoverable registers at that starting |
michael@0 | 633 | // address. Each entry typically covers a single function. |
michael@0 | 634 | // |
michael@0 | 635 | // Thus, to compute the contents of a given row of the table --- that |
michael@0 | 636 | // is, rules for recovering the CFA, RA, and registers at a given |
michael@0 | 637 | // instruction --- the consumer should find the entry that covers that |
michael@0 | 638 | // instruction's address, start with the initial state supplied at the |
michael@0 | 639 | // beginning of the entry, and work forward until it has processed all |
michael@0 | 640 | // the changes up to and including those for the present instruction. |
michael@0 | 641 | // |
michael@0 | 642 | // There are seven kinds of rules that can appear in an entry of the |
michael@0 | 643 | // table: |
michael@0 | 644 | // |
michael@0 | 645 | // - "undefined": The given register is not preserved by the callee; |
michael@0 | 646 | // its value cannot be recovered. |
michael@0 | 647 | // |
michael@0 | 648 | // - "same value": This register has the same value it did in the callee. |
michael@0 | 649 | // |
michael@0 | 650 | // - offset(N): The register is saved at offset N from the CFA. |
michael@0 | 651 | // |
michael@0 | 652 | // - val_offset(N): The value the register had in the caller is the |
michael@0 | 653 | // CFA plus offset N. (This is usually only useful for describing |
michael@0 | 654 | // the stack pointer.) |
michael@0 | 655 | // |
michael@0 | 656 | // - register(R): The register's value was saved in another register R. |
michael@0 | 657 | // |
michael@0 | 658 | // - expression(E): Evaluating the DWARF expression E using the |
michael@0 | 659 | // current frame's registers' values yields the address at which the |
michael@0 | 660 | // register was saved. |
michael@0 | 661 | // |
michael@0 | 662 | // - val_expression(E): Evaluating the DWARF expression E using the |
michael@0 | 663 | // current frame's registers' values yields the value the register |
michael@0 | 664 | // had in the caller. |
michael@0 | 665 | |
michael@0 | 666 | class CallFrameInfo { |
michael@0 | 667 | public: |
michael@0 | 668 | // The different kinds of entries one finds in CFI. Used internally, |
michael@0 | 669 | // and for error reporting. |
michael@0 | 670 | enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; |
michael@0 | 671 | |
michael@0 | 672 | // The handler class to which the parser hands the parsed call frame |
michael@0 | 673 | // information. Defined below. |
michael@0 | 674 | class Handler; |
michael@0 | 675 | |
michael@0 | 676 | // A reporter class, which CallFrameInfo uses to report errors |
michael@0 | 677 | // encountered while parsing call frame information. Defined below. |
michael@0 | 678 | class Reporter; |
michael@0 | 679 | |
michael@0 | 680 | // Create a DWARF CFI parser. BUFFER points to the contents of the |
michael@0 | 681 | // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. |
michael@0 | 682 | // REPORTER is an error reporter the parser should use to report |
michael@0 | 683 | // problems. READER is a ByteReader instance that has the endianness and |
michael@0 | 684 | // address size set properly. Report the data we find to HANDLER. |
michael@0 | 685 | // |
michael@0 | 686 | // This class can also parse Linux C++ exception handling data, as found |
michael@0 | 687 | // in '.eh_frame' sections. This data is a variant of DWARF CFI that is |
michael@0 | 688 | // placed in loadable segments so that it is present in the program's |
michael@0 | 689 | // address space, and is interpreted by the C++ runtime to search the |
michael@0 | 690 | // call stack for a handler interested in the exception being thrown, |
michael@0 | 691 | // actually pop the frames, and find cleanup code to run. |
michael@0 | 692 | // |
michael@0 | 693 | // There are two differences between the call frame information described |
michael@0 | 694 | // in the DWARF standard and the exception handling data Linux places in |
michael@0 | 695 | // the .eh_frame section: |
michael@0 | 696 | // |
michael@0 | 697 | // - Exception handling data uses uses a different format for call frame |
michael@0 | 698 | // information entry headers. The distinguished CIE id, the way FDEs |
michael@0 | 699 | // refer to their CIEs, and the way the end of the series of entries is |
michael@0 | 700 | // determined are all slightly different. |
michael@0 | 701 | // |
michael@0 | 702 | // If the constructor's EH_FRAME argument is true, then the |
michael@0 | 703 | // CallFrameInfo parses the entry headers as Linux C++ exception |
michael@0 | 704 | // handling data. If EH_FRAME is false or omitted, the CallFrameInfo |
michael@0 | 705 | // parses standard DWARF call frame information. |
michael@0 | 706 | // |
michael@0 | 707 | // - Linux C++ exception handling data uses CIE augmentation strings |
michael@0 | 708 | // beginning with 'z' to specify the presence of additional data after |
michael@0 | 709 | // the CIE and FDE headers and special encodings used for addresses in |
michael@0 | 710 | // frame description entries. |
michael@0 | 711 | // |
michael@0 | 712 | // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or |
michael@0 | 713 | // exception handling data if you have supplied READER with the base |
michael@0 | 714 | // addresses needed to interpret the pointer encodings that 'z' |
michael@0 | 715 | // augmentations can specify. See the ByteReader interface for details |
michael@0 | 716 | // about the base addresses. See the CallFrameInfo::Handler interface |
michael@0 | 717 | // for details about the additional information one might find in |
michael@0 | 718 | // 'z'-augmented data. |
michael@0 | 719 | // |
michael@0 | 720 | // Thus: |
michael@0 | 721 | // |
michael@0 | 722 | // - If you are parsing standard DWARF CFI, as found in a .debug_frame |
michael@0 | 723 | // section, you should pass false for the EH_FRAME argument, or omit |
michael@0 | 724 | // it, and you need not worry about providing READER with the |
michael@0 | 725 | // additional base addresses. |
michael@0 | 726 | // |
michael@0 | 727 | // - If you want to parse Linux C++ exception handling data from a |
michael@0 | 728 | // .eh_frame section, you should pass EH_FRAME as true, and call |
michael@0 | 729 | // READER's Set*Base member functions before calling our Start method. |
michael@0 | 730 | // |
michael@0 | 731 | // - If you want to parse DWARF CFI that uses the 'z' augmentations |
michael@0 | 732 | // (although I don't think any toolchain ever emits such data), you |
michael@0 | 733 | // could pass false for EH_FRAME, but call READER's Set*Base members. |
michael@0 | 734 | // |
michael@0 | 735 | // The extensions the Linux C++ ABI makes to DWARF for exception |
michael@0 | 736 | // handling are described here, rather poorly: |
michael@0 | 737 | // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html |
michael@0 | 738 | // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html |
michael@0 | 739 | // |
michael@0 | 740 | // The mechanics of C++ exception handling, personality routines, |
michael@0 | 741 | // and language-specific data areas are described here, rather nicely: |
michael@0 | 742 | // http://www.codesourcery.com/public/cxx-abi/abi-eh.html |
michael@0 | 743 | |
michael@0 | 744 | CallFrameInfo(const char *buffer, size_t buffer_length, |
michael@0 | 745 | ByteReader *reader, Handler *handler, Reporter *reporter, |
michael@0 | 746 | bool eh_frame = false) |
michael@0 | 747 | : buffer_(buffer), buffer_length_(buffer_length), |
michael@0 | 748 | reader_(reader), handler_(handler), reporter_(reporter), |
michael@0 | 749 | eh_frame_(eh_frame) { } |
michael@0 | 750 | |
michael@0 | 751 | ~CallFrameInfo() { } |
michael@0 | 752 | |
michael@0 | 753 | // Parse the entries in BUFFER, reporting what we find to HANDLER. |
michael@0 | 754 | // Return true if we reach the end of the section successfully, or |
michael@0 | 755 | // false if we encounter an error. |
michael@0 | 756 | bool Start(); |
michael@0 | 757 | |
michael@0 | 758 | // Return the textual name of KIND. For error reporting. |
michael@0 | 759 | static const char *KindName(EntryKind kind); |
michael@0 | 760 | |
michael@0 | 761 | private: |
michael@0 | 762 | |
michael@0 | 763 | struct CIE; |
michael@0 | 764 | |
michael@0 | 765 | // A CFI entry, either an FDE or a CIE. |
michael@0 | 766 | struct Entry { |
michael@0 | 767 | // The starting offset of the entry in the section, for error |
michael@0 | 768 | // reporting. |
michael@0 | 769 | size_t offset; |
michael@0 | 770 | |
michael@0 | 771 | // The start of this entry in the buffer. |
michael@0 | 772 | const char *start; |
michael@0 | 773 | |
michael@0 | 774 | // Which kind of entry this is. |
michael@0 | 775 | // |
michael@0 | 776 | // We want to be able to use this for error reporting even while we're |
michael@0 | 777 | // in the midst of parsing. Error reporting code may assume that kind, |
michael@0 | 778 | // offset, and start fields are valid, although kind may be kUnknown. |
michael@0 | 779 | EntryKind kind; |
michael@0 | 780 | |
michael@0 | 781 | // The end of this entry's common prologue (initial length and id), and |
michael@0 | 782 | // the start of this entry's kind-specific fields. |
michael@0 | 783 | const char *fields; |
michael@0 | 784 | |
michael@0 | 785 | // The start of this entry's instructions. |
michael@0 | 786 | const char *instructions; |
michael@0 | 787 | |
michael@0 | 788 | // The address past the entry's last byte in the buffer. (Note that |
michael@0 | 789 | // since offset points to the entry's initial length field, and the |
michael@0 | 790 | // length field is the number of bytes after that field, this is not |
michael@0 | 791 | // simply buffer_ + offset + length.) |
michael@0 | 792 | const char *end; |
michael@0 | 793 | |
michael@0 | 794 | // For both DWARF CFI and .eh_frame sections, this is the CIE id in a |
michael@0 | 795 | // CIE, and the offset of the associated CIE in an FDE. |
michael@0 | 796 | uint64 id; |
michael@0 | 797 | |
michael@0 | 798 | // The CIE that applies to this entry, if we've parsed it. If this is a |
michael@0 | 799 | // CIE, then this field points to this structure. |
michael@0 | 800 | CIE *cie; |
michael@0 | 801 | }; |
michael@0 | 802 | |
michael@0 | 803 | // A common information entry (CIE). |
michael@0 | 804 | struct CIE: public Entry { |
michael@0 | 805 | uint8 version; // CFI data version number |
michael@0 | 806 | std::string augmentation; // vendor format extension markers |
michael@0 | 807 | uint64 code_alignment_factor; // scale for code address adjustments |
michael@0 | 808 | int data_alignment_factor; // scale for stack pointer adjustments |
michael@0 | 809 | unsigned return_address_register; // which register holds the return addr |
michael@0 | 810 | |
michael@0 | 811 | // True if this CIE includes Linux C++ ABI 'z' augmentation data. |
michael@0 | 812 | bool has_z_augmentation; |
michael@0 | 813 | |
michael@0 | 814 | // Parsed 'z' augmentation data. These are meaningful only if |
michael@0 | 815 | // has_z_augmentation is true. |
michael@0 | 816 | bool has_z_lsda; // The 'z' augmentation included 'L'. |
michael@0 | 817 | bool has_z_personality; // The 'z' augmentation included 'P'. |
michael@0 | 818 | bool has_z_signal_frame; // The 'z' augmentation included 'S'. |
michael@0 | 819 | |
michael@0 | 820 | // If has_z_lsda is true, this is the encoding to be used for language- |
michael@0 | 821 | // specific data area pointers in FDEs. |
michael@0 | 822 | DwarfPointerEncoding lsda_encoding; |
michael@0 | 823 | |
michael@0 | 824 | // If has_z_personality is true, this is the encoding used for the |
michael@0 | 825 | // personality routine pointer in the augmentation data. |
michael@0 | 826 | DwarfPointerEncoding personality_encoding; |
michael@0 | 827 | |
michael@0 | 828 | // If has_z_personality is true, this is the address of the personality |
michael@0 | 829 | // routine --- or, if personality_encoding & DW_EH_PE_indirect, the |
michael@0 | 830 | // address where the personality routine's address is stored. |
michael@0 | 831 | uint64 personality_address; |
michael@0 | 832 | |
michael@0 | 833 | // This is the encoding used for addresses in the FDE header and |
michael@0 | 834 | // in DW_CFA_set_loc instructions. This is always valid, whether |
michael@0 | 835 | // or not we saw a 'z' augmentation string; its default value is |
michael@0 | 836 | // DW_EH_PE_absptr, which is what normal DWARF CFI uses. |
michael@0 | 837 | DwarfPointerEncoding pointer_encoding; |
michael@0 | 838 | }; |
michael@0 | 839 | |
michael@0 | 840 | // A frame description entry (FDE). |
michael@0 | 841 | struct FDE: public Entry { |
michael@0 | 842 | uint64 address; // start address of described code |
michael@0 | 843 | uint64 size; // size of described code, in bytes |
michael@0 | 844 | |
michael@0 | 845 | // If cie->has_z_lsda is true, then this is the language-specific data |
michael@0 | 846 | // area's address --- or its address's address, if cie->lsda_encoding |
michael@0 | 847 | // has the DW_EH_PE_indirect bit set. |
michael@0 | 848 | uint64 lsda_address; |
michael@0 | 849 | }; |
michael@0 | 850 | |
michael@0 | 851 | // Internal use. |
michael@0 | 852 | class Rule; |
michael@0 | 853 | class UndefinedRule; |
michael@0 | 854 | class SameValueRule; |
michael@0 | 855 | class OffsetRule; |
michael@0 | 856 | class ValOffsetRule; |
michael@0 | 857 | class RegisterRule; |
michael@0 | 858 | class ExpressionRule; |
michael@0 | 859 | class ValExpressionRule; |
michael@0 | 860 | class RuleMap; |
michael@0 | 861 | class State; |
michael@0 | 862 | |
michael@0 | 863 | // Parse the initial length and id of a CFI entry, either a CIE, an FDE, |
michael@0 | 864 | // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the |
michael@0 | 865 | // data to parse. On success, populate ENTRY as appropriate, and return |
michael@0 | 866 | // true. On failure, report the problem, and return false. Even if we |
michael@0 | 867 | // return false, set ENTRY->end to the first byte after the entry if we |
michael@0 | 868 | // were able to figure that out, or NULL if we weren't. |
michael@0 | 869 | bool ReadEntryPrologue(const char *cursor, Entry *entry); |
michael@0 | 870 | |
michael@0 | 871 | // Parse the fields of a CIE after the entry prologue, including any 'z' |
michael@0 | 872 | // augmentation data. Assume that the 'Entry' fields of CIE are |
michael@0 | 873 | // populated; use CIE->fields and CIE->end as the start and limit for |
michael@0 | 874 | // parsing. On success, populate the rest of *CIE, and return true; on |
michael@0 | 875 | // failure, report the problem and return false. |
michael@0 | 876 | bool ReadCIEFields(CIE *cie); |
michael@0 | 877 | |
michael@0 | 878 | // Parse the fields of an FDE after the entry prologue, including any 'z' |
michael@0 | 879 | // augmentation data. Assume that the 'Entry' fields of *FDE are |
michael@0 | 880 | // initialized; use FDE->fields and FDE->end as the start and limit for |
michael@0 | 881 | // parsing. Assume that FDE->cie is fully initialized. On success, |
michael@0 | 882 | // populate the rest of *FDE, and return true; on failure, report the |
michael@0 | 883 | // problem and return false. |
michael@0 | 884 | bool ReadFDEFields(FDE *fde); |
michael@0 | 885 | |
michael@0 | 886 | // Report that ENTRY is incomplete, and return false. This is just a |
michael@0 | 887 | // trivial wrapper for invoking reporter_->Incomplete; it provides a |
michael@0 | 888 | // little brevity. |
michael@0 | 889 | bool ReportIncomplete(Entry *entry); |
michael@0 | 890 | |
michael@0 | 891 | // Return true if ENCODING has the DW_EH_PE_indirect bit set. |
michael@0 | 892 | static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { |
michael@0 | 893 | return encoding & DW_EH_PE_indirect; |
michael@0 | 894 | } |
michael@0 | 895 | |
michael@0 | 896 | // The contents of the DWARF .debug_info section we're parsing. |
michael@0 | 897 | const char *buffer_; |
michael@0 | 898 | size_t buffer_length_; |
michael@0 | 899 | |
michael@0 | 900 | // For reading multi-byte values with the appropriate endianness. |
michael@0 | 901 | ByteReader *reader_; |
michael@0 | 902 | |
michael@0 | 903 | // The handler to which we should report the data we find. |
michael@0 | 904 | Handler *handler_; |
michael@0 | 905 | |
michael@0 | 906 | // For reporting problems in the info we're parsing. |
michael@0 | 907 | Reporter *reporter_; |
michael@0 | 908 | |
michael@0 | 909 | // True if we are processing .eh_frame-format data. |
michael@0 | 910 | bool eh_frame_; |
michael@0 | 911 | }; |
michael@0 | 912 | |
michael@0 | 913 | |
michael@0 | 914 | // The handler class for CallFrameInfo. The a CFI parser calls the |
michael@0 | 915 | // member functions of a handler object to report the data it finds. |
michael@0 | 916 | class CallFrameInfo::Handler { |
michael@0 | 917 | public: |
michael@0 | 918 | // The pseudo-register number for the canonical frame address. |
michael@0 | 919 | enum { kCFARegister = DW_REG_CFA }; |
michael@0 | 920 | |
michael@0 | 921 | Handler() { } |
michael@0 | 922 | virtual ~Handler() { } |
michael@0 | 923 | |
michael@0 | 924 | // The parser has found CFI for the machine code at ADDRESS, |
michael@0 | 925 | // extending for LENGTH bytes. OFFSET is the offset of the frame |
michael@0 | 926 | // description entry in the section, for use in error messages. |
michael@0 | 927 | // VERSION is the version number of the CFI format. AUGMENTATION is |
michael@0 | 928 | // a string describing any producer-specific extensions present in |
michael@0 | 929 | // the data. RETURN_ADDRESS is the number of the register that holds |
michael@0 | 930 | // the address to which the function should return. |
michael@0 | 931 | // |
michael@0 | 932 | // Entry should return true to process this CFI, or false to skip to |
michael@0 | 933 | // the next entry. |
michael@0 | 934 | // |
michael@0 | 935 | // The parser invokes Entry for each Frame Description Entry (FDE) |
michael@0 | 936 | // it finds. The parser doesn't report Common Information Entries |
michael@0 | 937 | // to the handler explicitly; instead, if the handler elects to |
michael@0 | 938 | // process a given FDE, the parser reiterates the appropriate CIE's |
michael@0 | 939 | // contents at the beginning of the FDE's rules. |
michael@0 | 940 | virtual bool Entry(size_t offset, uint64 address, uint64 length, |
michael@0 | 941 | uint8 version, const std::string &augmentation, |
michael@0 | 942 | unsigned return_address) = 0; |
michael@0 | 943 | |
michael@0 | 944 | // When the Entry function returns true, the parser calls these |
michael@0 | 945 | // handler functions repeatedly to describe the rules for recovering |
michael@0 | 946 | // registers at each instruction in the given range of machine code. |
michael@0 | 947 | // Immediately after a call to Entry, the handler should assume that |
michael@0 | 948 | // the rule for each callee-saves register is "unchanged" --- that |
michael@0 | 949 | // is, that the register still has the value it had in the caller. |
michael@0 | 950 | // |
michael@0 | 951 | // If a *Rule function returns true, we continue processing this entry's |
michael@0 | 952 | // instructions. If a *Rule function returns false, we stop evaluating |
michael@0 | 953 | // instructions, and skip to the next entry. Either way, we call End |
michael@0 | 954 | // before going on to the next entry. |
michael@0 | 955 | // |
michael@0 | 956 | // In all of these functions, if the REG parameter is kCFARegister, then |
michael@0 | 957 | // the rule describes how to find the canonical frame address. |
michael@0 | 958 | // kCFARegister may be passed as a BASE_REGISTER argument, meaning that |
michael@0 | 959 | // the canonical frame address should be used as the base address for the |
michael@0 | 960 | // computation. All other REG values will be positive. |
michael@0 | 961 | |
michael@0 | 962 | // At ADDRESS, register REG's value is not recoverable. |
michael@0 | 963 | virtual bool UndefinedRule(uint64 address, int reg) = 0; |
michael@0 | 964 | |
michael@0 | 965 | // At ADDRESS, register REG's value is the same as that it had in |
michael@0 | 966 | // the caller. |
michael@0 | 967 | virtual bool SameValueRule(uint64 address, int reg) = 0; |
michael@0 | 968 | |
michael@0 | 969 | // At ADDRESS, register REG has been saved at offset OFFSET from |
michael@0 | 970 | // BASE_REGISTER. |
michael@0 | 971 | virtual bool OffsetRule(uint64 address, int reg, |
michael@0 | 972 | int base_register, long offset) = 0; |
michael@0 | 973 | |
michael@0 | 974 | // At ADDRESS, the caller's value of register REG is the current |
michael@0 | 975 | // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an |
michael@0 | 976 | // address at which the register's value is saved.) |
michael@0 | 977 | virtual bool ValOffsetRule(uint64 address, int reg, |
michael@0 | 978 | int base_register, long offset) = 0; |
michael@0 | 979 | |
michael@0 | 980 | // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs |
michael@0 | 981 | // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that |
michael@0 | 982 | // BASE_REGISTER is the "home" for REG's saved value: if you want to |
michael@0 | 983 | // assign to a variable whose home is REG in the calling frame, you |
michael@0 | 984 | // should put the value in BASE_REGISTER. |
michael@0 | 985 | virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; |
michael@0 | 986 | |
michael@0 | 987 | // At ADDRESS, the DWARF expression EXPRESSION yields the address at |
michael@0 | 988 | // which REG was saved. |
michael@0 | 989 | virtual bool ExpressionRule(uint64 address, int reg, |
michael@0 | 990 | const std::string &expression) = 0; |
michael@0 | 991 | |
michael@0 | 992 | // At ADDRESS, the DWARF expression EXPRESSION yields the caller's |
michael@0 | 993 | // value for REG. (This rule doesn't provide an address at which the |
michael@0 | 994 | // register's value is saved.) |
michael@0 | 995 | virtual bool ValExpressionRule(uint64 address, int reg, |
michael@0 | 996 | const std::string &expression) = 0; |
michael@0 | 997 | |
michael@0 | 998 | // Indicate that the rules for the address range reported by the |
michael@0 | 999 | // last call to Entry are complete. End should return true if |
michael@0 | 1000 | // everything is okay, or false if an error has occurred and parsing |
michael@0 | 1001 | // should stop. |
michael@0 | 1002 | virtual bool End() = 0; |
michael@0 | 1003 | |
michael@0 | 1004 | // Handler functions for Linux C++ exception handling data. These are |
michael@0 | 1005 | // only called if the data includes 'z' augmentation strings. |
michael@0 | 1006 | |
michael@0 | 1007 | // The Linux C++ ABI uses an extension of the DWARF CFI format to |
michael@0 | 1008 | // walk the stack to propagate exceptions from the throw to the |
michael@0 | 1009 | // appropriate catch, and do the appropriate cleanups along the way. |
michael@0 | 1010 | // CFI entries used for exception handling have two additional data |
michael@0 | 1011 | // associated with them: |
michael@0 | 1012 | // |
michael@0 | 1013 | // - The "language-specific data area" describes which exception |
michael@0 | 1014 | // types the function has 'catch' clauses for, and indicates how |
michael@0 | 1015 | // to go about re-entering the function at the appropriate catch |
michael@0 | 1016 | // clause. If the exception is not caught, it describes the |
michael@0 | 1017 | // destructors that must run before the frame is popped. |
michael@0 | 1018 | // |
michael@0 | 1019 | // - The "personality routine" is responsible for interpreting the |
michael@0 | 1020 | // language-specific data area's contents, and deciding whether |
michael@0 | 1021 | // the exception should continue to propagate down the stack, |
michael@0 | 1022 | // perhaps after doing some cleanup for this frame, or whether the |
michael@0 | 1023 | // exception will be caught here. |
michael@0 | 1024 | // |
michael@0 | 1025 | // In principle, the language-specific data area is opaque to |
michael@0 | 1026 | // everybody but the personality routine. In practice, these values |
michael@0 | 1027 | // may be useful or interesting to readers with extra context, and |
michael@0 | 1028 | // we have to at least skip them anyway, so we might as well report |
michael@0 | 1029 | // them to the handler. |
michael@0 | 1030 | |
michael@0 | 1031 | // This entry's exception handling personality routine's address is |
michael@0 | 1032 | // ADDRESS. If INDIRECT is true, then ADDRESS is the address at |
michael@0 | 1033 | // which the routine's address is stored. The default definition for |
michael@0 | 1034 | // this handler function simply returns true, allowing parsing of |
michael@0 | 1035 | // the entry to continue. |
michael@0 | 1036 | virtual bool PersonalityRoutine(uint64 address, bool indirect) { |
michael@0 | 1037 | return true; |
michael@0 | 1038 | } |
michael@0 | 1039 | |
michael@0 | 1040 | // This entry's language-specific data area (LSDA) is located at |
michael@0 | 1041 | // ADDRESS. If INDIRECT is true, then ADDRESS is the address at |
michael@0 | 1042 | // which the area's address is stored. The default definition for |
michael@0 | 1043 | // this handler function simply returns true, allowing parsing of |
michael@0 | 1044 | // the entry to continue. |
michael@0 | 1045 | virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { |
michael@0 | 1046 | return true; |
michael@0 | 1047 | } |
michael@0 | 1048 | |
michael@0 | 1049 | // This entry describes a signal trampoline --- this frame is the |
michael@0 | 1050 | // caller of a signal handler. The default definition for this |
michael@0 | 1051 | // handler function simply returns true, allowing parsing of the |
michael@0 | 1052 | // entry to continue. |
michael@0 | 1053 | // |
michael@0 | 1054 | // The best description of the rationale for and meaning of signal |
michael@0 | 1055 | // trampoline CFI entries seems to be in the GCC bug database: |
michael@0 | 1056 | // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 |
michael@0 | 1057 | virtual bool SignalHandler() { return true; } |
michael@0 | 1058 | }; |
michael@0 | 1059 | |
michael@0 | 1060 | |
michael@0 | 1061 | // The CallFrameInfo class makes calls on an instance of this class to |
michael@0 | 1062 | // report errors or warn about problems in the data it is parsing. |
michael@0 | 1063 | // These messages are sent to the message sink |aLog| provided to the |
michael@0 | 1064 | // constructor. |
michael@0 | 1065 | class CallFrameInfo::Reporter { |
michael@0 | 1066 | public: |
michael@0 | 1067 | // Create an error reporter which attributes troubles to the section |
michael@0 | 1068 | // named SECTION in FILENAME. |
michael@0 | 1069 | // |
michael@0 | 1070 | // Normally SECTION would be .debug_frame, but the Mac puts CFI data |
michael@0 | 1071 | // in a Mach-O section named __debug_frame. If we support |
michael@0 | 1072 | // Linux-style exception handling data, we could be reading an |
michael@0 | 1073 | // .eh_frame section. |
michael@0 | 1074 | Reporter(void (*aLog)(const char*), |
michael@0 | 1075 | const std::string &filename, |
michael@0 | 1076 | const std::string §ion = ".debug_frame") |
michael@0 | 1077 | : log_(aLog), filename_(filename), section_(section) { } |
michael@0 | 1078 | virtual ~Reporter() { } |
michael@0 | 1079 | |
michael@0 | 1080 | // The CFI entry at OFFSET ends too early to be well-formed. KIND |
michael@0 | 1081 | // indicates what kind of entry it is; KIND can be kUnknown if we |
michael@0 | 1082 | // haven't parsed enough of the entry to tell yet. |
michael@0 | 1083 | virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); |
michael@0 | 1084 | |
michael@0 | 1085 | // The .eh_frame data has a four-byte zero at OFFSET where the next |
michael@0 | 1086 | // entry's length would be; this is a terminator. However, the buffer |
michael@0 | 1087 | // length as given to the CallFrameInfo constructor says there should be |
michael@0 | 1088 | // more data. |
michael@0 | 1089 | virtual void EarlyEHTerminator(uint64 offset); |
michael@0 | 1090 | |
michael@0 | 1091 | // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the |
michael@0 | 1092 | // section is not that large. |
michael@0 | 1093 | virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); |
michael@0 | 1094 | |
michael@0 | 1095 | // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry |
michael@0 | 1096 | // there is not a CIE. |
michael@0 | 1097 | virtual void BadCIEId(uint64 offset, uint64 cie_offset); |
michael@0 | 1098 | |
michael@0 | 1099 | // The FDE at OFFSET refers to a CIE with version number VERSION, |
michael@0 | 1100 | // which we don't recognize. We cannot parse DWARF CFI if it uses |
michael@0 | 1101 | // a version number we don't recognize. |
michael@0 | 1102 | virtual void UnrecognizedVersion(uint64 offset, int version); |
michael@0 | 1103 | |
michael@0 | 1104 | // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, |
michael@0 | 1105 | // which we don't recognize. We cannot parse DWARF CFI if it uses |
michael@0 | 1106 | // augmentations we don't recognize. |
michael@0 | 1107 | virtual void UnrecognizedAugmentation(uint64 offset, |
michael@0 | 1108 | const std::string &augmentation); |
michael@0 | 1109 | |
michael@0 | 1110 | // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not |
michael@0 | 1111 | // a valid encoding. |
michael@0 | 1112 | virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); |
michael@0 | 1113 | |
michael@0 | 1114 | // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends |
michael@0 | 1115 | // on a base address which has not been supplied. |
michael@0 | 1116 | virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); |
michael@0 | 1117 | |
michael@0 | 1118 | // The CIE at OFFSET contains a DW_CFA_restore instruction at |
michael@0 | 1119 | // INSN_OFFSET, which may not appear in a CIE. |
michael@0 | 1120 | virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); |
michael@0 | 1121 | |
michael@0 | 1122 | // The entry at OFFSET, of kind KIND, has an unrecognized |
michael@0 | 1123 | // instruction at INSN_OFFSET. |
michael@0 | 1124 | virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, |
michael@0 | 1125 | uint64 insn_offset); |
michael@0 | 1126 | |
michael@0 | 1127 | // The instruction at INSN_OFFSET in the entry at OFFSET, of kind |
michael@0 | 1128 | // KIND, establishes a rule that cites the CFA, but we have not |
michael@0 | 1129 | // established a CFA rule yet. |
michael@0 | 1130 | virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, |
michael@0 | 1131 | uint64 insn_offset); |
michael@0 | 1132 | |
michael@0 | 1133 | // The instruction at INSN_OFFSET in the entry at OFFSET, of kind |
michael@0 | 1134 | // KIND, is a DW_CFA_restore_state instruction, but the stack of |
michael@0 | 1135 | // saved states is empty. |
michael@0 | 1136 | virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, |
michael@0 | 1137 | uint64 insn_offset); |
michael@0 | 1138 | |
michael@0 | 1139 | // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry |
michael@0 | 1140 | // at OFFSET, of kind KIND, would restore a state that has no CFA |
michael@0 | 1141 | // rule, whereas the current state does have a CFA rule. This is |
michael@0 | 1142 | // bogus input, which the CallFrameInfo::Handler interface doesn't |
michael@0 | 1143 | // (and shouldn't) have any way to report. |
michael@0 | 1144 | virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, |
michael@0 | 1145 | uint64 insn_offset); |
michael@0 | 1146 | |
michael@0 | 1147 | private: |
michael@0 | 1148 | // A logging sink function, as supplied by LUL's user. |
michael@0 | 1149 | void (*log_)(const char*); |
michael@0 | 1150 | |
michael@0 | 1151 | protected: |
michael@0 | 1152 | // The name of the file whose CFI we're reading. |
michael@0 | 1153 | std::string filename_; |
michael@0 | 1154 | |
michael@0 | 1155 | // The name of the CFI section in that file. |
michael@0 | 1156 | std::string section_; |
michael@0 | 1157 | }; |
michael@0 | 1158 | |
michael@0 | 1159 | |
michael@0 | 1160 | using lul::CallFrameInfo; |
michael@0 | 1161 | using lul::Summariser; |
michael@0 | 1162 | |
michael@0 | 1163 | // A class that accepts parsed call frame information from the DWARF |
michael@0 | 1164 | // CFI parser and populates a google_breakpad::Module object with the |
michael@0 | 1165 | // contents. |
michael@0 | 1166 | class DwarfCFIToModule: public CallFrameInfo::Handler { |
michael@0 | 1167 | public: |
michael@0 | 1168 | |
michael@0 | 1169 | // DwarfCFIToModule uses an instance of this class to report errors |
michael@0 | 1170 | // detected while converting DWARF CFI to Breakpad STACK CFI records. |
michael@0 | 1171 | class Reporter { |
michael@0 | 1172 | public: |
michael@0 | 1173 | // Create a reporter that writes messages to the message sink |
michael@0 | 1174 | // |aLog|. FILE is the name of the file we're processing, and |
michael@0 | 1175 | // SECTION is the name of the section within that file that we're |
michael@0 | 1176 | // looking at (.debug_frame, .eh_frame, etc.). |
michael@0 | 1177 | Reporter(void (*aLog)(const char*), |
michael@0 | 1178 | const std::string &file, const std::string §ion) |
michael@0 | 1179 | : log_(aLog), file_(file), section_(section) { } |
michael@0 | 1180 | virtual ~Reporter() { } |
michael@0 | 1181 | |
michael@0 | 1182 | // The DWARF CFI entry at OFFSET says that REG is undefined, but the |
michael@0 | 1183 | // Breakpad symbol file format cannot express this. |
michael@0 | 1184 | virtual void UndefinedNotSupported(size_t offset, |
michael@0 | 1185 | const UniqueString* reg); |
michael@0 | 1186 | |
michael@0 | 1187 | // The DWARF CFI entry at OFFSET says that REG uses a DWARF |
michael@0 | 1188 | // expression to find its value, but DwarfCFIToModule is not |
michael@0 | 1189 | // capable of translating DWARF expressions to Breakpad postfix |
michael@0 | 1190 | // expressions. |
michael@0 | 1191 | virtual void ExpressionsNotSupported(size_t offset, |
michael@0 | 1192 | const UniqueString* reg); |
michael@0 | 1193 | |
michael@0 | 1194 | private: |
michael@0 | 1195 | // A logging sink function, as supplied by LUL's user. |
michael@0 | 1196 | void (*log_)(const char*); |
michael@0 | 1197 | protected: |
michael@0 | 1198 | std::string file_, section_; |
michael@0 | 1199 | }; |
michael@0 | 1200 | |
michael@0 | 1201 | // Register name tables. If TABLE is a vector returned by one of these |
michael@0 | 1202 | // functions, then TABLE[R] is the name of the register numbered R in |
michael@0 | 1203 | // DWARF call frame information. |
michael@0 | 1204 | class RegisterNames { |
michael@0 | 1205 | public: |
michael@0 | 1206 | // Intel's "x86" or IA-32. |
michael@0 | 1207 | static const unsigned int I386(); |
michael@0 | 1208 | |
michael@0 | 1209 | // AMD x86_64, AMD64, Intel EM64T, or Intel 64 |
michael@0 | 1210 | static const unsigned int X86_64(); |
michael@0 | 1211 | |
michael@0 | 1212 | // ARM. |
michael@0 | 1213 | static const unsigned int ARM(); |
michael@0 | 1214 | }; |
michael@0 | 1215 | |
michael@0 | 1216 | // Create a handler for the dwarf2reader::CallFrameInfo parser that |
michael@0 | 1217 | // records the stack unwinding information it receives in SUMM. |
michael@0 | 1218 | // |
michael@0 | 1219 | // Use REGISTER_NAMES[I] as the name of register number I; *this |
michael@0 | 1220 | // keeps a reference to the vector, so the vector should remain |
michael@0 | 1221 | // alive for as long as the DwarfCFIToModule does. |
michael@0 | 1222 | // |
michael@0 | 1223 | // Use REPORTER for reporting problems encountered in the conversion |
michael@0 | 1224 | // process. |
michael@0 | 1225 | DwarfCFIToModule(const unsigned int num_dw_regs, |
michael@0 | 1226 | Reporter *reporter, |
michael@0 | 1227 | /*OUT*/Summariser* summ) |
michael@0 | 1228 | : summ_(summ), num_dw_regs_(num_dw_regs), reporter_(reporter), |
michael@0 | 1229 | return_address_(-1) { |
michael@0 | 1230 | } |
michael@0 | 1231 | virtual ~DwarfCFIToModule() {} |
michael@0 | 1232 | |
michael@0 | 1233 | virtual bool Entry(size_t offset, uint64 address, uint64 length, |
michael@0 | 1234 | uint8 version, const std::string &augmentation, |
michael@0 | 1235 | unsigned return_address); |
michael@0 | 1236 | virtual bool UndefinedRule(uint64 address, int reg); |
michael@0 | 1237 | virtual bool SameValueRule(uint64 address, int reg); |
michael@0 | 1238 | virtual bool OffsetRule(uint64 address, int reg, |
michael@0 | 1239 | int base_register, long offset); |
michael@0 | 1240 | virtual bool ValOffsetRule(uint64 address, int reg, |
michael@0 | 1241 | int base_register, long offset); |
michael@0 | 1242 | virtual bool RegisterRule(uint64 address, int reg, int base_register); |
michael@0 | 1243 | virtual bool ExpressionRule(uint64 address, int reg, |
michael@0 | 1244 | const std::string &expression); |
michael@0 | 1245 | virtual bool ValExpressionRule(uint64 address, int reg, |
michael@0 | 1246 | const std::string &expression); |
michael@0 | 1247 | virtual bool End(); |
michael@0 | 1248 | |
michael@0 | 1249 | private: |
michael@0 | 1250 | // Return the name to use for register REG. |
michael@0 | 1251 | const UniqueString* RegisterName(int i); |
michael@0 | 1252 | |
michael@0 | 1253 | // The Summariser to which we should give entries |
michael@0 | 1254 | Summariser* summ_; |
michael@0 | 1255 | |
michael@0 | 1256 | // The number of Dwarf-defined register names for this architecture. |
michael@0 | 1257 | const unsigned int num_dw_regs_; |
michael@0 | 1258 | |
michael@0 | 1259 | // The reporter to use to report problems. |
michael@0 | 1260 | Reporter *reporter_; |
michael@0 | 1261 | |
michael@0 | 1262 | // The section offset of the current frame description entry, for |
michael@0 | 1263 | // use in error messages. |
michael@0 | 1264 | size_t entry_offset_; |
michael@0 | 1265 | |
michael@0 | 1266 | // The return address column for that entry. |
michael@0 | 1267 | unsigned return_address_; |
michael@0 | 1268 | }; |
michael@0 | 1269 | |
michael@0 | 1270 | } // namespace lul |
michael@0 | 1271 | |
michael@0 | 1272 | #endif // LulDwarfExt_h |