1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/profiler/LulDwarfExt.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1272 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 + 1.7 +// Copyright 2006, 2010 Google Inc. All Rights Reserved. 1.8 +// 1.9 +// Redistribution and use in source and binary forms, with or without 1.10 +// modification, are permitted provided that the following conditions are 1.11 +// met: 1.12 +// 1.13 +// * Redistributions of source code must retain the above copyright 1.14 +// notice, this list of conditions and the following disclaimer. 1.15 +// * Redistributions in binary form must reproduce the above 1.16 +// copyright notice, this list of conditions and the following disclaimer 1.17 +// in the documentation and/or other materials provided with the 1.18 +// distribution. 1.19 +// * Neither the name of Google Inc. nor the names of its 1.20 +// contributors may be used to endorse or promote products derived from 1.21 +// this software without specific prior written permission. 1.22 +// 1.23 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.24 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.25 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.26 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.27 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.28 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.29 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.30 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.31 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.32 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.33 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.34 + 1.35 +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 1.36 + 1.37 +// This file is derived from the following files in 1.38 +// toolkit/crashreporter/google-breakpad: 1.39 +// src/common/dwarf/types.h 1.40 +// src/common/dwarf/dwarf2enums.h 1.41 +// src/common/dwarf/bytereader.h 1.42 +// src/common/dwarf_cfi_to_module.h 1.43 +// src/common/dwarf/dwarf2reader.h 1.44 + 1.45 +#ifndef LulDwarfExt_h 1.46 +#define LulDwarfExt_h 1.47 + 1.48 +#include <stdint.h> 1.49 + 1.50 +#include "mozilla/Assertions.h" 1.51 + 1.52 +#include "LulDwarfSummariser.h" 1.53 + 1.54 +typedef signed char int8; 1.55 +typedef short int16; 1.56 +typedef int int32; 1.57 +typedef long long int64; 1.58 + 1.59 +typedef unsigned char uint8; 1.60 +typedef unsigned short uint16; 1.61 +typedef unsigned int uint32; 1.62 +typedef unsigned long long uint64; 1.63 + 1.64 +#ifdef __PTRDIFF_TYPE__ 1.65 +typedef __PTRDIFF_TYPE__ intptr; 1.66 +typedef unsigned __PTRDIFF_TYPE__ uintptr; 1.67 +#else 1.68 +#error "Can't find pointer-sized integral types." 1.69 +#endif 1.70 + 1.71 + 1.72 +namespace lul { 1.73 + 1.74 +// Exception handling frame description pointer formats, as described 1.75 +// by the Linux Standard Base Core Specification 4.0, section 11.5, 1.76 +// DWARF Extensions. 1.77 +enum DwarfPointerEncoding 1.78 + { 1.79 + DW_EH_PE_absptr = 0x00, 1.80 + DW_EH_PE_omit = 0xff, 1.81 + DW_EH_PE_uleb128 = 0x01, 1.82 + DW_EH_PE_udata2 = 0x02, 1.83 + DW_EH_PE_udata4 = 0x03, 1.84 + DW_EH_PE_udata8 = 0x04, 1.85 + DW_EH_PE_sleb128 = 0x09, 1.86 + DW_EH_PE_sdata2 = 0x0A, 1.87 + DW_EH_PE_sdata4 = 0x0B, 1.88 + DW_EH_PE_sdata8 = 0x0C, 1.89 + DW_EH_PE_pcrel = 0x10, 1.90 + DW_EH_PE_textrel = 0x20, 1.91 + DW_EH_PE_datarel = 0x30, 1.92 + DW_EH_PE_funcrel = 0x40, 1.93 + DW_EH_PE_aligned = 0x50, 1.94 + 1.95 + // The GNU toolchain sources define this enum value as well, 1.96 + // simply to help classify the lower nybble values into signed and 1.97 + // unsigned groups. 1.98 + DW_EH_PE_signed = 0x08, 1.99 + 1.100 + // This is not documented in LSB 4.0, but it is used in both the 1.101 + // Linux and OS X toolchains. It can be added to any other 1.102 + // encoding (except DW_EH_PE_aligned), and indicates that the 1.103 + // encoded value represents the address at which the true address 1.104 + // is stored, not the true address itself. 1.105 + DW_EH_PE_indirect = 0x80 1.106 + }; 1.107 + 1.108 + 1.109 +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN 1.110 +// because it conflicts with a macro 1.111 +enum Endianness { 1.112 + ENDIANNESS_BIG, 1.113 + ENDIANNESS_LITTLE 1.114 +}; 1.115 + 1.116 +// A ByteReader knows how to read single- and multi-byte values of 1.117 +// various endiannesses, sizes, and encodings, as used in DWARF 1.118 +// debugging information and Linux C++ exception handling data. 1.119 +class ByteReader { 1.120 + public: 1.121 + // Construct a ByteReader capable of reading one-, two-, four-, and 1.122 + // eight-byte values according to ENDIANNESS, absolute machine-sized 1.123 + // addresses, DWARF-style "initial length" values, signed and 1.124 + // unsigned LEB128 numbers, and Linux C++ exception handling data's 1.125 + // encoded pointers. 1.126 + explicit ByteReader(enum Endianness endianness); 1.127 + virtual ~ByteReader(); 1.128 + 1.129 + // Read a single byte from BUFFER and return it as an unsigned 8 bit 1.130 + // number. 1.131 + uint8 ReadOneByte(const char* buffer) const; 1.132 + 1.133 + // Read two bytes from BUFFER and return them as an unsigned 16 bit 1.134 + // number, using this ByteReader's endianness. 1.135 + uint16 ReadTwoBytes(const char* buffer) const; 1.136 + 1.137 + // Read four bytes from BUFFER and return them as an unsigned 32 bit 1.138 + // number, using this ByteReader's endianness. This function returns 1.139 + // a uint64 so that it is compatible with ReadAddress and 1.140 + // ReadOffset. The number it returns will never be outside the range 1.141 + // of an unsigned 32 bit integer. 1.142 + uint64 ReadFourBytes(const char* buffer) const; 1.143 + 1.144 + // Read eight bytes from BUFFER and return them as an unsigned 64 1.145 + // bit number, using this ByteReader's endianness. 1.146 + uint64 ReadEightBytes(const char* buffer) const; 1.147 + 1.148 + // Read an unsigned LEB128 (Little Endian Base 128) number from 1.149 + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to 1.150 + // the number of bytes read. 1.151 + // 1.152 + // The unsigned LEB128 representation of an integer N is a variable 1.153 + // number of bytes: 1.154 + // 1.155 + // - If N is between 0 and 0x7f, then its unsigned LEB128 1.156 + // representation is a single byte whose value is N. 1.157 + // 1.158 + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | 1.159 + // 0x80, followed by the unsigned LEB128 representation of N / 1.160 + // 128, rounded towards negative infinity. 1.161 + // 1.162 + // In other words, we break VALUE into groups of seven bits, put 1.163 + // them in little-endian order, and then write them as eight-bit 1.164 + // bytes with the high bit on all but the last. 1.165 + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; 1.166 + 1.167 + // Read a signed LEB128 number from BUFFER and return it as an 1.168 + // signed 64 bit integer. Set LEN to the number of bytes read. 1.169 + // 1.170 + // The signed LEB128 representation of an integer N is a variable 1.171 + // number of bytes: 1.172 + // 1.173 + // - If N is between -0x40 and 0x3f, then its signed LEB128 1.174 + // representation is a single byte whose value is N in two's 1.175 + // complement. 1.176 + // 1.177 + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | 1.178 + // 0x80, followed by the signed LEB128 representation of N / 128, 1.179 + // rounded towards negative infinity. 1.180 + // 1.181 + // In other words, we break VALUE into groups of seven bits, put 1.182 + // them in little-endian order, and then write them as eight-bit 1.183 + // bytes with the high bit on all but the last. 1.184 + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; 1.185 + 1.186 + // Indicate that addresses on this architecture are SIZE bytes long. SIZE 1.187 + // must be either 4 or 8. (DWARF allows addresses to be any number of 1.188 + // bytes in length from 1 to 255, but we only support 32- and 64-bit 1.189 + // addresses at the moment.) You must call this before using the 1.190 + // ReadAddress member function. 1.191 + // 1.192 + // For data in a .debug_info section, or something that .debug_info 1.193 + // refers to like line number or macro data, the compilation unit 1.194 + // header's address_size field indicates the address size to use. Call 1.195 + // frame information doesn't indicate its address size (a shortcoming of 1.196 + // the spec); you must supply the appropriate size based on the 1.197 + // architecture of the target machine. 1.198 + void SetAddressSize(uint8 size); 1.199 + 1.200 + // Return the current address size, in bytes. This is either 4, 1.201 + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. 1.202 + uint8 AddressSize() const { return address_size_; } 1.203 + 1.204 + // Read an address from BUFFER and return it as an unsigned 64 bit 1.205 + // integer, respecting this ByteReader's endianness and address size. You 1.206 + // must call SetAddressSize before calling this function. 1.207 + uint64 ReadAddress(const char* buffer) const; 1.208 + 1.209 + // DWARF actually defines two slightly different formats: 32-bit DWARF 1.210 + // and 64-bit DWARF. This is *not* related to the size of registers or 1.211 + // addresses on the target machine; it refers only to the size of section 1.212 + // offsets and data lengths appearing in the DWARF data. One only needs 1.213 + // 64-bit DWARF when the debugging data itself is larger than 4GiB. 1.214 + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the 1.215 + // debugging data itself is very large. 1.216 + // 1.217 + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each 1.218 + // compilation unit and call frame information entry begins with an 1.219 + // "initial length" field, which, in addition to giving the length of the 1.220 + // data, also indicates the size of section offsets and lengths appearing 1.221 + // in that data. The ReadInitialLength member function, below, reads an 1.222 + // initial length and sets the ByteReader's offset size as a side effect. 1.223 + // Thus, in the normal process of reading DWARF data, the appropriate 1.224 + // offset size is set automatically. So, you should only need to call 1.225 + // SetOffsetSize if you are using the same ByteReader to jump from the 1.226 + // midst of one block of DWARF data into another. 1.227 + 1.228 + // Read a DWARF "initial length" field from START, and return it as 1.229 + // an unsigned 64 bit integer, respecting this ByteReader's 1.230 + // endianness. Set *LEN to the length of the initial length in 1.231 + // bytes, either four or twelve. As a side effect, set this 1.232 + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF 1.233 + // initial length) or 8 (if we see a 64-bit DWARF initial length). 1.234 + // 1.235 + // A DWARF initial length is either: 1.236 + // 1.237 + // - a byte count stored as an unsigned 32-bit value less than 1.238 + // 0xffffff00, indicating that the data whose length is being 1.239 + // measured uses the 32-bit DWARF format, or 1.240 + // 1.241 + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, 1.242 + // indicating that the data whose length is being measured uses 1.243 + // the 64-bit DWARF format. 1.244 + uint64 ReadInitialLength(const char* start, size_t* len); 1.245 + 1.246 + // Read an offset from BUFFER and return it as an unsigned 64 bit 1.247 + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the 1.248 + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes 1.249 + // long. You must call ReadInitialLength or SetOffsetSize before calling 1.250 + // this function; see the comments above for details. 1.251 + uint64 ReadOffset(const char* buffer) const; 1.252 + 1.253 + // Return the current offset size, in bytes. 1.254 + // A return value of 4 indicates that we are reading 32-bit DWARF. 1.255 + // A return value of 8 indicates that we are reading 64-bit DWARF. 1.256 + uint8 OffsetSize() const { return offset_size_; } 1.257 + 1.258 + // Indicate that section offsets and lengths are SIZE bytes long. SIZE 1.259 + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). 1.260 + // Usually, you should not call this function yourself; instead, let a 1.261 + // call to ReadInitialLength establish the data's offset size 1.262 + // automatically. 1.263 + void SetOffsetSize(uint8 size); 1.264 + 1.265 + // The Linux C++ ABI uses a variant of DWARF call frame information 1.266 + // for exception handling. This data is included in the program's 1.267 + // address space as the ".eh_frame" section, and intepreted at 1.268 + // runtime to walk the stack, find exception handlers, and run 1.269 + // cleanup code. The format is mostly the same as DWARF CFI, with 1.270 + // some adjustments made to provide the additional 1.271 + // exception-handling data, and to make the data easier to work with 1.272 + // in memory --- for example, to allow it to be placed in read-only 1.273 + // memory even when describing position-independent code. 1.274 + // 1.275 + // In particular, exception handling data can select a number of 1.276 + // different encodings for pointers that appear in the data, as 1.277 + // described by the DwarfPointerEncoding enum. There are actually 1.278 + // four axes(!) to the encoding: 1.279 + // 1.280 + // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use 1.281 + // the DWARF LEB128 encoding. 1.282 + // 1.283 + // - The pointer's signedness: pointers can be signed or unsigned. 1.284 + // 1.285 + // - The pointer's base address: the data stored in the exception 1.286 + // handling data can be the actual address (that is, an absolute 1.287 + // pointer), or relative to one of a number of different base 1.288 + // addreses --- including that of the encoded pointer itself, for 1.289 + // a form of "pc-relative" addressing. 1.290 + // 1.291 + // - The pointer may be indirect: it may be the address where the 1.292 + // true pointer is stored. (This is used to refer to things via 1.293 + // global offset table entries, program linkage table entries, or 1.294 + // other tricks used in position-independent code.) 1.295 + // 1.296 + // There are also two options that fall outside that matrix 1.297 + // altogether: the pointer may be omitted, or it may have padding to 1.298 + // align it on an appropriate address boundary. (That last option 1.299 + // may seem like it should be just another axis, but it is not.) 1.300 + 1.301 + // Indicate that the exception handling data is loaded starting at 1.302 + // SECTION_BASE, and that the start of its buffer in our own memory 1.303 + // is BUFFER_BASE. This allows us to find the address that a given 1.304 + // byte in our buffer would have when loaded into the program the 1.305 + // data describes. We need this to resolve DW_EH_PE_pcrel pointers. 1.306 + void SetCFIDataBase(uint64 section_base, const char *buffer_base); 1.307 + 1.308 + // Indicate that the base address of the program's ".text" section 1.309 + // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. 1.310 + void SetTextBase(uint64 text_base); 1.311 + 1.312 + // Indicate that the base address for DW_EH_PE_datarel pointers is 1.313 + // DATA_BASE. The proper value depends on the ABI; it is usually the 1.314 + // address of the global offset table, held in a designated register in 1.315 + // position-independent code. You will need to look at the startup code 1.316 + // for the target system to be sure. I tried; my eyes bled. 1.317 + void SetDataBase(uint64 data_base); 1.318 + 1.319 + // Indicate that the base address for the FDE we are processing is 1.320 + // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel 1.321 + // pointers. (This encoding does not seem to be used by the GNU 1.322 + // toolchain.) 1.323 + void SetFunctionBase(uint64 function_base); 1.324 + 1.325 + // Indicate that we are no longer processing any FDE, so any use of 1.326 + // a DW_EH_PE_funcrel encoding is an error. 1.327 + void ClearFunctionBase(); 1.328 + 1.329 + // Return true if ENCODING is a valid pointer encoding. 1.330 + bool ValidEncoding(DwarfPointerEncoding encoding) const; 1.331 + 1.332 + // Return true if we have all the information we need to read a 1.333 + // pointer that uses ENCODING. This checks that the appropriate 1.334 + // SetFooBase function for ENCODING has been called. 1.335 + bool UsableEncoding(DwarfPointerEncoding encoding) const; 1.336 + 1.337 + // Read an encoded pointer from BUFFER using ENCODING; return the 1.338 + // absolute address it represents, and set *LEN to the pointer's 1.339 + // length in bytes, including any padding for aligned pointers. 1.340 + // 1.341 + // This function calls 'abort' if ENCODING is invalid or refers to a 1.342 + // base address this reader hasn't been given, so you should check 1.343 + // with ValidEncoding and UsableEncoding first if you would rather 1.344 + // die in a more helpful way. 1.345 + uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, 1.346 + size_t *len) const; 1.347 + 1.348 + private: 1.349 + 1.350 + // Function pointer type for our address and offset readers. 1.351 + typedef uint64 (ByteReader::*AddressReader)(const char*) const; 1.352 + 1.353 + // Read an offset from BUFFER and return it as an unsigned 64 bit 1.354 + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, 1.355 + // generally depending on the amount of DWARF2/3 info present. 1.356 + // This function pointer gets set by SetOffsetSize. 1.357 + AddressReader offset_reader_; 1.358 + 1.359 + // Read an address from BUFFER and return it as an unsigned 64 bit 1.360 + // integer. DWARF2/3 allow addresses to be any size from 0-255 1.361 + // bytes currently. Internally we support 4 and 8 byte addresses, 1.362 + // and will CHECK on anything else. 1.363 + // This function pointer gets set by SetAddressSize. 1.364 + AddressReader address_reader_; 1.365 + 1.366 + Endianness endian_; 1.367 + uint8 address_size_; 1.368 + uint8 offset_size_; 1.369 + 1.370 + // Base addresses for Linux C++ exception handling data's encoded pointers. 1.371 + bool have_section_base_, have_text_base_, have_data_base_; 1.372 + bool have_function_base_; 1.373 + uint64 section_base_; 1.374 + uint64 text_base_, data_base_, function_base_; 1.375 + const char *buffer_base_; 1.376 +}; 1.377 + 1.378 + 1.379 +inline uint8 ByteReader::ReadOneByte(const char* buffer) const { 1.380 + return buffer[0]; 1.381 +} 1.382 + 1.383 +inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const { 1.384 + const unsigned char *buffer 1.385 + = reinterpret_cast<const unsigned char *>(signed_buffer); 1.386 + const uint16 buffer0 = buffer[0]; 1.387 + const uint16 buffer1 = buffer[1]; 1.388 + if (endian_ == ENDIANNESS_LITTLE) { 1.389 + return buffer0 | buffer1 << 8; 1.390 + } else { 1.391 + return buffer1 | buffer0 << 8; 1.392 + } 1.393 +} 1.394 + 1.395 +inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const { 1.396 + const unsigned char *buffer 1.397 + = reinterpret_cast<const unsigned char *>(signed_buffer); 1.398 + const uint32 buffer0 = buffer[0]; 1.399 + const uint32 buffer1 = buffer[1]; 1.400 + const uint32 buffer2 = buffer[2]; 1.401 + const uint32 buffer3 = buffer[3]; 1.402 + if (endian_ == ENDIANNESS_LITTLE) { 1.403 + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; 1.404 + } else { 1.405 + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; 1.406 + } 1.407 +} 1.408 + 1.409 +inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const { 1.410 + const unsigned char *buffer 1.411 + = reinterpret_cast<const unsigned char *>(signed_buffer); 1.412 + const uint64 buffer0 = buffer[0]; 1.413 + const uint64 buffer1 = buffer[1]; 1.414 + const uint64 buffer2 = buffer[2]; 1.415 + const uint64 buffer3 = buffer[3]; 1.416 + const uint64 buffer4 = buffer[4]; 1.417 + const uint64 buffer5 = buffer[5]; 1.418 + const uint64 buffer6 = buffer[6]; 1.419 + const uint64 buffer7 = buffer[7]; 1.420 + if (endian_ == ENDIANNESS_LITTLE) { 1.421 + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | 1.422 + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; 1.423 + } else { 1.424 + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | 1.425 + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; 1.426 + } 1.427 +} 1.428 + 1.429 +// Read an unsigned LEB128 number. Each byte contains 7 bits of 1.430 +// information, plus one bit saying whether the number continues or 1.431 +// not. 1.432 + 1.433 +inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, 1.434 + size_t* len) const { 1.435 + uint64 result = 0; 1.436 + size_t num_read = 0; 1.437 + unsigned int shift = 0; 1.438 + unsigned char byte; 1.439 + 1.440 + do { 1.441 + byte = *buffer++; 1.442 + num_read++; 1.443 + 1.444 + result |= (static_cast<uint64>(byte & 0x7f)) << shift; 1.445 + 1.446 + shift += 7; 1.447 + 1.448 + } while (byte & 0x80); 1.449 + 1.450 + *len = num_read; 1.451 + 1.452 + return result; 1.453 +} 1.454 + 1.455 +// Read a signed LEB128 number. These are like regular LEB128 1.456 +// numbers, except the last byte may have a sign bit set. 1.457 + 1.458 +inline int64 ByteReader::ReadSignedLEB128(const char* buffer, 1.459 + size_t* len) const { 1.460 + int64 result = 0; 1.461 + unsigned int shift = 0; 1.462 + size_t num_read = 0; 1.463 + unsigned char byte; 1.464 + 1.465 + do { 1.466 + byte = *buffer++; 1.467 + num_read++; 1.468 + result |= (static_cast<uint64>(byte & 0x7f) << shift); 1.469 + shift += 7; 1.470 + } while (byte & 0x80); 1.471 + 1.472 + if ((shift < 8 * sizeof (result)) && (byte & 0x40)) 1.473 + result |= -((static_cast<int64>(1)) << shift); 1.474 + *len = num_read; 1.475 + return result; 1.476 +} 1.477 + 1.478 +inline uint64 ByteReader::ReadOffset(const char* buffer) const { 1.479 + MOZ_ASSERT(this->offset_reader_); 1.480 + return (this->*offset_reader_)(buffer); 1.481 +} 1.482 + 1.483 +inline uint64 ByteReader::ReadAddress(const char* buffer) const { 1.484 + MOZ_ASSERT(this->address_reader_); 1.485 + return (this->*address_reader_)(buffer); 1.486 +} 1.487 + 1.488 +inline void ByteReader::SetCFIDataBase(uint64 section_base, 1.489 + const char *buffer_base) { 1.490 + section_base_ = section_base; 1.491 + buffer_base_ = buffer_base; 1.492 + have_section_base_ = true; 1.493 +} 1.494 + 1.495 +inline void ByteReader::SetTextBase(uint64 text_base) { 1.496 + text_base_ = text_base; 1.497 + have_text_base_ = true; 1.498 +} 1.499 + 1.500 +inline void ByteReader::SetDataBase(uint64 data_base) { 1.501 + data_base_ = data_base; 1.502 + have_data_base_ = true; 1.503 +} 1.504 + 1.505 +inline void ByteReader::SetFunctionBase(uint64 function_base) { 1.506 + function_base_ = function_base; 1.507 + have_function_base_ = true; 1.508 +} 1.509 + 1.510 +inline void ByteReader::ClearFunctionBase() { 1.511 + have_function_base_ = false; 1.512 +} 1.513 + 1.514 + 1.515 +// (derived from) 1.516 +// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which 1.517 +// accepts parsed DWARF call frame info and adds it to a Summariser object. 1.518 + 1.519 +// This class is a reader for DWARF's Call Frame Information. CFI 1.520 +// describes how to unwind stack frames --- even for functions that do 1.521 +// not follow fixed conventions for saving registers, whose frame size 1.522 +// varies as they execute, etc. 1.523 +// 1.524 +// CFI describes, at each machine instruction, how to compute the 1.525 +// stack frame's base address, how to find the return address, and 1.526 +// where to find the saved values of the caller's registers (if the 1.527 +// callee has stashed them somewhere to free up the registers for its 1.528 +// own use). 1.529 +// 1.530 +// For example, suppose we have a function whose machine code looks 1.531 +// like this (imagine an assembly language that looks like C, for a 1.532 +// machine with 32-bit registers, and a stack that grows towards lower 1.533 +// addresses): 1.534 +// 1.535 +// func: ; entry point; return address at sp 1.536 +// func+0: sp = sp - 16 ; allocate space for stack frame 1.537 +// func+1: sp[12] = r0 ; save r0 at sp+12 1.538 +// ... ; other code, not frame-related 1.539 +// func+10: sp -= 4; *sp = x ; push some x on the stack 1.540 +// ... ; other code, not frame-related 1.541 +// func+20: r0 = sp[16] ; restore saved r0 1.542 +// func+21: sp += 20 ; pop whole stack frame 1.543 +// func+22: pc = *sp; sp += 4 ; pop return address and jump to it 1.544 +// 1.545 +// DWARF CFI is (a very compressed representation of) a table with a 1.546 +// row for each machine instruction address and a column for each 1.547 +// register showing how to restore it, if possible. 1.548 +// 1.549 +// A special column named "CFA", for "Canonical Frame Address", tells how 1.550 +// to compute the base address of the frame; registers' entries may 1.551 +// refer to the CFA in describing where the registers are saved. 1.552 +// 1.553 +// Another special column, named "RA", represents the return address. 1.554 +// 1.555 +// For example, here is a complete (uncompressed) table describing the 1.556 +// function above: 1.557 +// 1.558 +// insn cfa r0 r1 ... ra 1.559 +// ======================================= 1.560 +// func+0: sp cfa[0] 1.561 +// func+1: sp+16 cfa[0] 1.562 +// func+2: sp+16 cfa[-4] cfa[0] 1.563 +// func+11: sp+20 cfa[-4] cfa[0] 1.564 +// func+21: sp+20 cfa[0] 1.565 +// func+22: sp cfa[0] 1.566 +// 1.567 +// Some things to note here: 1.568 +// 1.569 +// - Each row describes the state of affairs *before* executing the 1.570 +// instruction at the given address. Thus, the row for func+0 1.571 +// describes the state before we allocate the stack frame. In the 1.572 +// next row, the formula for computing the CFA has changed, 1.573 +// reflecting that allocation. 1.574 +// 1.575 +// - The other entries are written in terms of the CFA; this allows 1.576 +// them to remain unchanged as the stack pointer gets bumped around. 1.577 +// For example, the rule for recovering the return address (the "ra" 1.578 +// column) remains unchanged throughout the function, even as the 1.579 +// stack pointer takes on three different offsets from the return 1.580 +// address. 1.581 +// 1.582 +// - Although we haven't shown it, most calling conventions designate 1.583 +// "callee-saves" and "caller-saves" registers. The callee must 1.584 +// preserve the values of callee-saves registers; if it uses them, 1.585 +// it must save their original values somewhere, and restore them 1.586 +// before it returns. In contrast, the callee is free to trash 1.587 +// caller-saves registers; if the callee uses these, it will 1.588 +// probably not bother to save them anywhere, and the CFI will 1.589 +// probably mark their values as "unrecoverable". 1.590 +// 1.591 +// (However, since the caller cannot assume the callee was going to 1.592 +// save them, caller-saves registers are probably dead in the caller 1.593 +// anyway, so compilers usually don't generate CFA for caller-saves 1.594 +// registers.) 1.595 +// 1.596 +// - Exactly where the CFA points is a matter of convention that 1.597 +// depends on the architecture and ABI in use. In the example, the 1.598 +// CFA is the value the stack pointer had upon entry to the 1.599 +// function, pointing at the saved return address. But on the x86, 1.600 +// the call frame information generated by GCC follows the 1.601 +// convention that the CFA is the address *after* the saved return 1.602 +// address. 1.603 +// 1.604 +// But by definition, the CFA remains constant throughout the 1.605 +// lifetime of the frame. This makes it a useful value for other 1.606 +// columns to refer to. It is also gives debuggers a useful handle 1.607 +// for identifying a frame. 1.608 +// 1.609 +// If you look at the table above, you'll notice that a given entry is 1.610 +// often the same as the one immediately above it: most instructions 1.611 +// change only one or two aspects of the stack frame, if they affect 1.612 +// it at all. The DWARF format takes advantage of this fact, and 1.613 +// reduces the size of the data by mentioning only the addresses and 1.614 +// columns at which changes take place. So for the above, DWARF CFI 1.615 +// data would only actually mention the following: 1.616 +// 1.617 +// insn cfa r0 r1 ... ra 1.618 +// ======================================= 1.619 +// func+0: sp cfa[0] 1.620 +// func+1: sp+16 1.621 +// func+2: cfa[-4] 1.622 +// func+11: sp+20 1.623 +// func+21: r0 1.624 +// func+22: sp 1.625 +// 1.626 +// In fact, this is the way the parser reports CFI to the consumer: as 1.627 +// a series of statements of the form, "At address X, column Y changed 1.628 +// to Z," and related conventions for describing the initial state. 1.629 +// 1.630 +// Naturally, it would be impractical to have to scan the entire 1.631 +// program's CFI, noting changes as we go, just to recover the 1.632 +// unwinding rules in effect at one particular instruction. To avoid 1.633 +// this, CFI data is grouped into "entries", each of which covers a 1.634 +// specified range of addresses and begins with a complete statement 1.635 +// of the rules for all recoverable registers at that starting 1.636 +// address. Each entry typically covers a single function. 1.637 +// 1.638 +// Thus, to compute the contents of a given row of the table --- that 1.639 +// is, rules for recovering the CFA, RA, and registers at a given 1.640 +// instruction --- the consumer should find the entry that covers that 1.641 +// instruction's address, start with the initial state supplied at the 1.642 +// beginning of the entry, and work forward until it has processed all 1.643 +// the changes up to and including those for the present instruction. 1.644 +// 1.645 +// There are seven kinds of rules that can appear in an entry of the 1.646 +// table: 1.647 +// 1.648 +// - "undefined": The given register is not preserved by the callee; 1.649 +// its value cannot be recovered. 1.650 +// 1.651 +// - "same value": This register has the same value it did in the callee. 1.652 +// 1.653 +// - offset(N): The register is saved at offset N from the CFA. 1.654 +// 1.655 +// - val_offset(N): The value the register had in the caller is the 1.656 +// CFA plus offset N. (This is usually only useful for describing 1.657 +// the stack pointer.) 1.658 +// 1.659 +// - register(R): The register's value was saved in another register R. 1.660 +// 1.661 +// - expression(E): Evaluating the DWARF expression E using the 1.662 +// current frame's registers' values yields the address at which the 1.663 +// register was saved. 1.664 +// 1.665 +// - val_expression(E): Evaluating the DWARF expression E using the 1.666 +// current frame's registers' values yields the value the register 1.667 +// had in the caller. 1.668 + 1.669 +class CallFrameInfo { 1.670 + public: 1.671 + // The different kinds of entries one finds in CFI. Used internally, 1.672 + // and for error reporting. 1.673 + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; 1.674 + 1.675 + // The handler class to which the parser hands the parsed call frame 1.676 + // information. Defined below. 1.677 + class Handler; 1.678 + 1.679 + // A reporter class, which CallFrameInfo uses to report errors 1.680 + // encountered while parsing call frame information. Defined below. 1.681 + class Reporter; 1.682 + 1.683 + // Create a DWARF CFI parser. BUFFER points to the contents of the 1.684 + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. 1.685 + // REPORTER is an error reporter the parser should use to report 1.686 + // problems. READER is a ByteReader instance that has the endianness and 1.687 + // address size set properly. Report the data we find to HANDLER. 1.688 + // 1.689 + // This class can also parse Linux C++ exception handling data, as found 1.690 + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is 1.691 + // placed in loadable segments so that it is present in the program's 1.692 + // address space, and is interpreted by the C++ runtime to search the 1.693 + // call stack for a handler interested in the exception being thrown, 1.694 + // actually pop the frames, and find cleanup code to run. 1.695 + // 1.696 + // There are two differences between the call frame information described 1.697 + // in the DWARF standard and the exception handling data Linux places in 1.698 + // the .eh_frame section: 1.699 + // 1.700 + // - Exception handling data uses uses a different format for call frame 1.701 + // information entry headers. The distinguished CIE id, the way FDEs 1.702 + // refer to their CIEs, and the way the end of the series of entries is 1.703 + // determined are all slightly different. 1.704 + // 1.705 + // If the constructor's EH_FRAME argument is true, then the 1.706 + // CallFrameInfo parses the entry headers as Linux C++ exception 1.707 + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo 1.708 + // parses standard DWARF call frame information. 1.709 + // 1.710 + // - Linux C++ exception handling data uses CIE augmentation strings 1.711 + // beginning with 'z' to specify the presence of additional data after 1.712 + // the CIE and FDE headers and special encodings used for addresses in 1.713 + // frame description entries. 1.714 + // 1.715 + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or 1.716 + // exception handling data if you have supplied READER with the base 1.717 + // addresses needed to interpret the pointer encodings that 'z' 1.718 + // augmentations can specify. See the ByteReader interface for details 1.719 + // about the base addresses. See the CallFrameInfo::Handler interface 1.720 + // for details about the additional information one might find in 1.721 + // 'z'-augmented data. 1.722 + // 1.723 + // Thus: 1.724 + // 1.725 + // - If you are parsing standard DWARF CFI, as found in a .debug_frame 1.726 + // section, you should pass false for the EH_FRAME argument, or omit 1.727 + // it, and you need not worry about providing READER with the 1.728 + // additional base addresses. 1.729 + // 1.730 + // - If you want to parse Linux C++ exception handling data from a 1.731 + // .eh_frame section, you should pass EH_FRAME as true, and call 1.732 + // READER's Set*Base member functions before calling our Start method. 1.733 + // 1.734 + // - If you want to parse DWARF CFI that uses the 'z' augmentations 1.735 + // (although I don't think any toolchain ever emits such data), you 1.736 + // could pass false for EH_FRAME, but call READER's Set*Base members. 1.737 + // 1.738 + // The extensions the Linux C++ ABI makes to DWARF for exception 1.739 + // handling are described here, rather poorly: 1.740 + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html 1.741 + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html 1.742 + // 1.743 + // The mechanics of C++ exception handling, personality routines, 1.744 + // and language-specific data areas are described here, rather nicely: 1.745 + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html 1.746 + 1.747 + CallFrameInfo(const char *buffer, size_t buffer_length, 1.748 + ByteReader *reader, Handler *handler, Reporter *reporter, 1.749 + bool eh_frame = false) 1.750 + : buffer_(buffer), buffer_length_(buffer_length), 1.751 + reader_(reader), handler_(handler), reporter_(reporter), 1.752 + eh_frame_(eh_frame) { } 1.753 + 1.754 + ~CallFrameInfo() { } 1.755 + 1.756 + // Parse the entries in BUFFER, reporting what we find to HANDLER. 1.757 + // Return true if we reach the end of the section successfully, or 1.758 + // false if we encounter an error. 1.759 + bool Start(); 1.760 + 1.761 + // Return the textual name of KIND. For error reporting. 1.762 + static const char *KindName(EntryKind kind); 1.763 + 1.764 + private: 1.765 + 1.766 + struct CIE; 1.767 + 1.768 + // A CFI entry, either an FDE or a CIE. 1.769 + struct Entry { 1.770 + // The starting offset of the entry in the section, for error 1.771 + // reporting. 1.772 + size_t offset; 1.773 + 1.774 + // The start of this entry in the buffer. 1.775 + const char *start; 1.776 + 1.777 + // Which kind of entry this is. 1.778 + // 1.779 + // We want to be able to use this for error reporting even while we're 1.780 + // in the midst of parsing. Error reporting code may assume that kind, 1.781 + // offset, and start fields are valid, although kind may be kUnknown. 1.782 + EntryKind kind; 1.783 + 1.784 + // The end of this entry's common prologue (initial length and id), and 1.785 + // the start of this entry's kind-specific fields. 1.786 + const char *fields; 1.787 + 1.788 + // The start of this entry's instructions. 1.789 + const char *instructions; 1.790 + 1.791 + // The address past the entry's last byte in the buffer. (Note that 1.792 + // since offset points to the entry's initial length field, and the 1.793 + // length field is the number of bytes after that field, this is not 1.794 + // simply buffer_ + offset + length.) 1.795 + const char *end; 1.796 + 1.797 + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a 1.798 + // CIE, and the offset of the associated CIE in an FDE. 1.799 + uint64 id; 1.800 + 1.801 + // The CIE that applies to this entry, if we've parsed it. If this is a 1.802 + // CIE, then this field points to this structure. 1.803 + CIE *cie; 1.804 + }; 1.805 + 1.806 + // A common information entry (CIE). 1.807 + struct CIE: public Entry { 1.808 + uint8 version; // CFI data version number 1.809 + std::string augmentation; // vendor format extension markers 1.810 + uint64 code_alignment_factor; // scale for code address adjustments 1.811 + int data_alignment_factor; // scale for stack pointer adjustments 1.812 + unsigned return_address_register; // which register holds the return addr 1.813 + 1.814 + // True if this CIE includes Linux C++ ABI 'z' augmentation data. 1.815 + bool has_z_augmentation; 1.816 + 1.817 + // Parsed 'z' augmentation data. These are meaningful only if 1.818 + // has_z_augmentation is true. 1.819 + bool has_z_lsda; // The 'z' augmentation included 'L'. 1.820 + bool has_z_personality; // The 'z' augmentation included 'P'. 1.821 + bool has_z_signal_frame; // The 'z' augmentation included 'S'. 1.822 + 1.823 + // If has_z_lsda is true, this is the encoding to be used for language- 1.824 + // specific data area pointers in FDEs. 1.825 + DwarfPointerEncoding lsda_encoding; 1.826 + 1.827 + // If has_z_personality is true, this is the encoding used for the 1.828 + // personality routine pointer in the augmentation data. 1.829 + DwarfPointerEncoding personality_encoding; 1.830 + 1.831 + // If has_z_personality is true, this is the address of the personality 1.832 + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the 1.833 + // address where the personality routine's address is stored. 1.834 + uint64 personality_address; 1.835 + 1.836 + // This is the encoding used for addresses in the FDE header and 1.837 + // in DW_CFA_set_loc instructions. This is always valid, whether 1.838 + // or not we saw a 'z' augmentation string; its default value is 1.839 + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. 1.840 + DwarfPointerEncoding pointer_encoding; 1.841 + }; 1.842 + 1.843 + // A frame description entry (FDE). 1.844 + struct FDE: public Entry { 1.845 + uint64 address; // start address of described code 1.846 + uint64 size; // size of described code, in bytes 1.847 + 1.848 + // If cie->has_z_lsda is true, then this is the language-specific data 1.849 + // area's address --- or its address's address, if cie->lsda_encoding 1.850 + // has the DW_EH_PE_indirect bit set. 1.851 + uint64 lsda_address; 1.852 + }; 1.853 + 1.854 + // Internal use. 1.855 + class Rule; 1.856 + class UndefinedRule; 1.857 + class SameValueRule; 1.858 + class OffsetRule; 1.859 + class ValOffsetRule; 1.860 + class RegisterRule; 1.861 + class ExpressionRule; 1.862 + class ValExpressionRule; 1.863 + class RuleMap; 1.864 + class State; 1.865 + 1.866 + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, 1.867 + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the 1.868 + // data to parse. On success, populate ENTRY as appropriate, and return 1.869 + // true. On failure, report the problem, and return false. Even if we 1.870 + // return false, set ENTRY->end to the first byte after the entry if we 1.871 + // were able to figure that out, or NULL if we weren't. 1.872 + bool ReadEntryPrologue(const char *cursor, Entry *entry); 1.873 + 1.874 + // Parse the fields of a CIE after the entry prologue, including any 'z' 1.875 + // augmentation data. Assume that the 'Entry' fields of CIE are 1.876 + // populated; use CIE->fields and CIE->end as the start and limit for 1.877 + // parsing. On success, populate the rest of *CIE, and return true; on 1.878 + // failure, report the problem and return false. 1.879 + bool ReadCIEFields(CIE *cie); 1.880 + 1.881 + // Parse the fields of an FDE after the entry prologue, including any 'z' 1.882 + // augmentation data. Assume that the 'Entry' fields of *FDE are 1.883 + // initialized; use FDE->fields and FDE->end as the start and limit for 1.884 + // parsing. Assume that FDE->cie is fully initialized. On success, 1.885 + // populate the rest of *FDE, and return true; on failure, report the 1.886 + // problem and return false. 1.887 + bool ReadFDEFields(FDE *fde); 1.888 + 1.889 + // Report that ENTRY is incomplete, and return false. This is just a 1.890 + // trivial wrapper for invoking reporter_->Incomplete; it provides a 1.891 + // little brevity. 1.892 + bool ReportIncomplete(Entry *entry); 1.893 + 1.894 + // Return true if ENCODING has the DW_EH_PE_indirect bit set. 1.895 + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { 1.896 + return encoding & DW_EH_PE_indirect; 1.897 + } 1.898 + 1.899 + // The contents of the DWARF .debug_info section we're parsing. 1.900 + const char *buffer_; 1.901 + size_t buffer_length_; 1.902 + 1.903 + // For reading multi-byte values with the appropriate endianness. 1.904 + ByteReader *reader_; 1.905 + 1.906 + // The handler to which we should report the data we find. 1.907 + Handler *handler_; 1.908 + 1.909 + // For reporting problems in the info we're parsing. 1.910 + Reporter *reporter_; 1.911 + 1.912 + // True if we are processing .eh_frame-format data. 1.913 + bool eh_frame_; 1.914 +}; 1.915 + 1.916 + 1.917 +// The handler class for CallFrameInfo. The a CFI parser calls the 1.918 +// member functions of a handler object to report the data it finds. 1.919 +class CallFrameInfo::Handler { 1.920 + public: 1.921 + // The pseudo-register number for the canonical frame address. 1.922 + enum { kCFARegister = DW_REG_CFA }; 1.923 + 1.924 + Handler() { } 1.925 + virtual ~Handler() { } 1.926 + 1.927 + // The parser has found CFI for the machine code at ADDRESS, 1.928 + // extending for LENGTH bytes. OFFSET is the offset of the frame 1.929 + // description entry in the section, for use in error messages. 1.930 + // VERSION is the version number of the CFI format. AUGMENTATION is 1.931 + // a string describing any producer-specific extensions present in 1.932 + // the data. RETURN_ADDRESS is the number of the register that holds 1.933 + // the address to which the function should return. 1.934 + // 1.935 + // Entry should return true to process this CFI, or false to skip to 1.936 + // the next entry. 1.937 + // 1.938 + // The parser invokes Entry for each Frame Description Entry (FDE) 1.939 + // it finds. The parser doesn't report Common Information Entries 1.940 + // to the handler explicitly; instead, if the handler elects to 1.941 + // process a given FDE, the parser reiterates the appropriate CIE's 1.942 + // contents at the beginning of the FDE's rules. 1.943 + virtual bool Entry(size_t offset, uint64 address, uint64 length, 1.944 + uint8 version, const std::string &augmentation, 1.945 + unsigned return_address) = 0; 1.946 + 1.947 + // When the Entry function returns true, the parser calls these 1.948 + // handler functions repeatedly to describe the rules for recovering 1.949 + // registers at each instruction in the given range of machine code. 1.950 + // Immediately after a call to Entry, the handler should assume that 1.951 + // the rule for each callee-saves register is "unchanged" --- that 1.952 + // is, that the register still has the value it had in the caller. 1.953 + // 1.954 + // If a *Rule function returns true, we continue processing this entry's 1.955 + // instructions. If a *Rule function returns false, we stop evaluating 1.956 + // instructions, and skip to the next entry. Either way, we call End 1.957 + // before going on to the next entry. 1.958 + // 1.959 + // In all of these functions, if the REG parameter is kCFARegister, then 1.960 + // the rule describes how to find the canonical frame address. 1.961 + // kCFARegister may be passed as a BASE_REGISTER argument, meaning that 1.962 + // the canonical frame address should be used as the base address for the 1.963 + // computation. All other REG values will be positive. 1.964 + 1.965 + // At ADDRESS, register REG's value is not recoverable. 1.966 + virtual bool UndefinedRule(uint64 address, int reg) = 0; 1.967 + 1.968 + // At ADDRESS, register REG's value is the same as that it had in 1.969 + // the caller. 1.970 + virtual bool SameValueRule(uint64 address, int reg) = 0; 1.971 + 1.972 + // At ADDRESS, register REG has been saved at offset OFFSET from 1.973 + // BASE_REGISTER. 1.974 + virtual bool OffsetRule(uint64 address, int reg, 1.975 + int base_register, long offset) = 0; 1.976 + 1.977 + // At ADDRESS, the caller's value of register REG is the current 1.978 + // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an 1.979 + // address at which the register's value is saved.) 1.980 + virtual bool ValOffsetRule(uint64 address, int reg, 1.981 + int base_register, long offset) = 0; 1.982 + 1.983 + // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs 1.984 + // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that 1.985 + // BASE_REGISTER is the "home" for REG's saved value: if you want to 1.986 + // assign to a variable whose home is REG in the calling frame, you 1.987 + // should put the value in BASE_REGISTER. 1.988 + virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; 1.989 + 1.990 + // At ADDRESS, the DWARF expression EXPRESSION yields the address at 1.991 + // which REG was saved. 1.992 + virtual bool ExpressionRule(uint64 address, int reg, 1.993 + const std::string &expression) = 0; 1.994 + 1.995 + // At ADDRESS, the DWARF expression EXPRESSION yields the caller's 1.996 + // value for REG. (This rule doesn't provide an address at which the 1.997 + // register's value is saved.) 1.998 + virtual bool ValExpressionRule(uint64 address, int reg, 1.999 + const std::string &expression) = 0; 1.1000 + 1.1001 + // Indicate that the rules for the address range reported by the 1.1002 + // last call to Entry are complete. End should return true if 1.1003 + // everything is okay, or false if an error has occurred and parsing 1.1004 + // should stop. 1.1005 + virtual bool End() = 0; 1.1006 + 1.1007 + // Handler functions for Linux C++ exception handling data. These are 1.1008 + // only called if the data includes 'z' augmentation strings. 1.1009 + 1.1010 + // The Linux C++ ABI uses an extension of the DWARF CFI format to 1.1011 + // walk the stack to propagate exceptions from the throw to the 1.1012 + // appropriate catch, and do the appropriate cleanups along the way. 1.1013 + // CFI entries used for exception handling have two additional data 1.1014 + // associated with them: 1.1015 + // 1.1016 + // - The "language-specific data area" describes which exception 1.1017 + // types the function has 'catch' clauses for, and indicates how 1.1018 + // to go about re-entering the function at the appropriate catch 1.1019 + // clause. If the exception is not caught, it describes the 1.1020 + // destructors that must run before the frame is popped. 1.1021 + // 1.1022 + // - The "personality routine" is responsible for interpreting the 1.1023 + // language-specific data area's contents, and deciding whether 1.1024 + // the exception should continue to propagate down the stack, 1.1025 + // perhaps after doing some cleanup for this frame, or whether the 1.1026 + // exception will be caught here. 1.1027 + // 1.1028 + // In principle, the language-specific data area is opaque to 1.1029 + // everybody but the personality routine. In practice, these values 1.1030 + // may be useful or interesting to readers with extra context, and 1.1031 + // we have to at least skip them anyway, so we might as well report 1.1032 + // them to the handler. 1.1033 + 1.1034 + // This entry's exception handling personality routine's address is 1.1035 + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1.1036 + // which the routine's address is stored. The default definition for 1.1037 + // this handler function simply returns true, allowing parsing of 1.1038 + // the entry to continue. 1.1039 + virtual bool PersonalityRoutine(uint64 address, bool indirect) { 1.1040 + return true; 1.1041 + } 1.1042 + 1.1043 + // This entry's language-specific data area (LSDA) is located at 1.1044 + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1.1045 + // which the area's address is stored. The default definition for 1.1046 + // this handler function simply returns true, allowing parsing of 1.1047 + // the entry to continue. 1.1048 + virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { 1.1049 + return true; 1.1050 + } 1.1051 + 1.1052 + // This entry describes a signal trampoline --- this frame is the 1.1053 + // caller of a signal handler. The default definition for this 1.1054 + // handler function simply returns true, allowing parsing of the 1.1055 + // entry to continue. 1.1056 + // 1.1057 + // The best description of the rationale for and meaning of signal 1.1058 + // trampoline CFI entries seems to be in the GCC bug database: 1.1059 + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 1.1060 + virtual bool SignalHandler() { return true; } 1.1061 +}; 1.1062 + 1.1063 + 1.1064 +// The CallFrameInfo class makes calls on an instance of this class to 1.1065 +// report errors or warn about problems in the data it is parsing. 1.1066 +// These messages are sent to the message sink |aLog| provided to the 1.1067 +// constructor. 1.1068 +class CallFrameInfo::Reporter { 1.1069 + public: 1.1070 + // Create an error reporter which attributes troubles to the section 1.1071 + // named SECTION in FILENAME. 1.1072 + // 1.1073 + // Normally SECTION would be .debug_frame, but the Mac puts CFI data 1.1074 + // in a Mach-O section named __debug_frame. If we support 1.1075 + // Linux-style exception handling data, we could be reading an 1.1076 + // .eh_frame section. 1.1077 + Reporter(void (*aLog)(const char*), 1.1078 + const std::string &filename, 1.1079 + const std::string §ion = ".debug_frame") 1.1080 + : log_(aLog), filename_(filename), section_(section) { } 1.1081 + virtual ~Reporter() { } 1.1082 + 1.1083 + // The CFI entry at OFFSET ends too early to be well-formed. KIND 1.1084 + // indicates what kind of entry it is; KIND can be kUnknown if we 1.1085 + // haven't parsed enough of the entry to tell yet. 1.1086 + virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); 1.1087 + 1.1088 + // The .eh_frame data has a four-byte zero at OFFSET where the next 1.1089 + // entry's length would be; this is a terminator. However, the buffer 1.1090 + // length as given to the CallFrameInfo constructor says there should be 1.1091 + // more data. 1.1092 + virtual void EarlyEHTerminator(uint64 offset); 1.1093 + 1.1094 + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the 1.1095 + // section is not that large. 1.1096 + virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); 1.1097 + 1.1098 + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry 1.1099 + // there is not a CIE. 1.1100 + virtual void BadCIEId(uint64 offset, uint64 cie_offset); 1.1101 + 1.1102 + // The FDE at OFFSET refers to a CIE with version number VERSION, 1.1103 + // which we don't recognize. We cannot parse DWARF CFI if it uses 1.1104 + // a version number we don't recognize. 1.1105 + virtual void UnrecognizedVersion(uint64 offset, int version); 1.1106 + 1.1107 + // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, 1.1108 + // which we don't recognize. We cannot parse DWARF CFI if it uses 1.1109 + // augmentations we don't recognize. 1.1110 + virtual void UnrecognizedAugmentation(uint64 offset, 1.1111 + const std::string &augmentation); 1.1112 + 1.1113 + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not 1.1114 + // a valid encoding. 1.1115 + virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); 1.1116 + 1.1117 + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends 1.1118 + // on a base address which has not been supplied. 1.1119 + virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); 1.1120 + 1.1121 + // The CIE at OFFSET contains a DW_CFA_restore instruction at 1.1122 + // INSN_OFFSET, which may not appear in a CIE. 1.1123 + virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); 1.1124 + 1.1125 + // The entry at OFFSET, of kind KIND, has an unrecognized 1.1126 + // instruction at INSN_OFFSET. 1.1127 + virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, 1.1128 + uint64 insn_offset); 1.1129 + 1.1130 + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1.1131 + // KIND, establishes a rule that cites the CFA, but we have not 1.1132 + // established a CFA rule yet. 1.1133 + virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, 1.1134 + uint64 insn_offset); 1.1135 + 1.1136 + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1.1137 + // KIND, is a DW_CFA_restore_state instruction, but the stack of 1.1138 + // saved states is empty. 1.1139 + virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, 1.1140 + uint64 insn_offset); 1.1141 + 1.1142 + // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry 1.1143 + // at OFFSET, of kind KIND, would restore a state that has no CFA 1.1144 + // rule, whereas the current state does have a CFA rule. This is 1.1145 + // bogus input, which the CallFrameInfo::Handler interface doesn't 1.1146 + // (and shouldn't) have any way to report. 1.1147 + virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, 1.1148 + uint64 insn_offset); 1.1149 + 1.1150 + private: 1.1151 + // A logging sink function, as supplied by LUL's user. 1.1152 + void (*log_)(const char*); 1.1153 + 1.1154 + protected: 1.1155 + // The name of the file whose CFI we're reading. 1.1156 + std::string filename_; 1.1157 + 1.1158 + // The name of the CFI section in that file. 1.1159 + std::string section_; 1.1160 +}; 1.1161 + 1.1162 + 1.1163 +using lul::CallFrameInfo; 1.1164 +using lul::Summariser; 1.1165 + 1.1166 +// A class that accepts parsed call frame information from the DWARF 1.1167 +// CFI parser and populates a google_breakpad::Module object with the 1.1168 +// contents. 1.1169 +class DwarfCFIToModule: public CallFrameInfo::Handler { 1.1170 + public: 1.1171 + 1.1172 + // DwarfCFIToModule uses an instance of this class to report errors 1.1173 + // detected while converting DWARF CFI to Breakpad STACK CFI records. 1.1174 + class Reporter { 1.1175 + public: 1.1176 + // Create a reporter that writes messages to the message sink 1.1177 + // |aLog|. FILE is the name of the file we're processing, and 1.1178 + // SECTION is the name of the section within that file that we're 1.1179 + // looking at (.debug_frame, .eh_frame, etc.). 1.1180 + Reporter(void (*aLog)(const char*), 1.1181 + const std::string &file, const std::string §ion) 1.1182 + : log_(aLog), file_(file), section_(section) { } 1.1183 + virtual ~Reporter() { } 1.1184 + 1.1185 + // The DWARF CFI entry at OFFSET says that REG is undefined, but the 1.1186 + // Breakpad symbol file format cannot express this. 1.1187 + virtual void UndefinedNotSupported(size_t offset, 1.1188 + const UniqueString* reg); 1.1189 + 1.1190 + // The DWARF CFI entry at OFFSET says that REG uses a DWARF 1.1191 + // expression to find its value, but DwarfCFIToModule is not 1.1192 + // capable of translating DWARF expressions to Breakpad postfix 1.1193 + // expressions. 1.1194 + virtual void ExpressionsNotSupported(size_t offset, 1.1195 + const UniqueString* reg); 1.1196 + 1.1197 + private: 1.1198 + // A logging sink function, as supplied by LUL's user. 1.1199 + void (*log_)(const char*); 1.1200 + protected: 1.1201 + std::string file_, section_; 1.1202 + }; 1.1203 + 1.1204 + // Register name tables. If TABLE is a vector returned by one of these 1.1205 + // functions, then TABLE[R] is the name of the register numbered R in 1.1206 + // DWARF call frame information. 1.1207 + class RegisterNames { 1.1208 + public: 1.1209 + // Intel's "x86" or IA-32. 1.1210 + static const unsigned int I386(); 1.1211 + 1.1212 + // AMD x86_64, AMD64, Intel EM64T, or Intel 64 1.1213 + static const unsigned int X86_64(); 1.1214 + 1.1215 + // ARM. 1.1216 + static const unsigned int ARM(); 1.1217 + }; 1.1218 + 1.1219 + // Create a handler for the dwarf2reader::CallFrameInfo parser that 1.1220 + // records the stack unwinding information it receives in SUMM. 1.1221 + // 1.1222 + // Use REGISTER_NAMES[I] as the name of register number I; *this 1.1223 + // keeps a reference to the vector, so the vector should remain 1.1224 + // alive for as long as the DwarfCFIToModule does. 1.1225 + // 1.1226 + // Use REPORTER for reporting problems encountered in the conversion 1.1227 + // process. 1.1228 + DwarfCFIToModule(const unsigned int num_dw_regs, 1.1229 + Reporter *reporter, 1.1230 + /*OUT*/Summariser* summ) 1.1231 + : summ_(summ), num_dw_regs_(num_dw_regs), reporter_(reporter), 1.1232 + return_address_(-1) { 1.1233 + } 1.1234 + virtual ~DwarfCFIToModule() {} 1.1235 + 1.1236 + virtual bool Entry(size_t offset, uint64 address, uint64 length, 1.1237 + uint8 version, const std::string &augmentation, 1.1238 + unsigned return_address); 1.1239 + virtual bool UndefinedRule(uint64 address, int reg); 1.1240 + virtual bool SameValueRule(uint64 address, int reg); 1.1241 + virtual bool OffsetRule(uint64 address, int reg, 1.1242 + int base_register, long offset); 1.1243 + virtual bool ValOffsetRule(uint64 address, int reg, 1.1244 + int base_register, long offset); 1.1245 + virtual bool RegisterRule(uint64 address, int reg, int base_register); 1.1246 + virtual bool ExpressionRule(uint64 address, int reg, 1.1247 + const std::string &expression); 1.1248 + virtual bool ValExpressionRule(uint64 address, int reg, 1.1249 + const std::string &expression); 1.1250 + virtual bool End(); 1.1251 + 1.1252 + private: 1.1253 + // Return the name to use for register REG. 1.1254 + const UniqueString* RegisterName(int i); 1.1255 + 1.1256 + // The Summariser to which we should give entries 1.1257 + Summariser* summ_; 1.1258 + 1.1259 + // The number of Dwarf-defined register names for this architecture. 1.1260 + const unsigned int num_dw_regs_; 1.1261 + 1.1262 + // The reporter to use to report problems. 1.1263 + Reporter *reporter_; 1.1264 + 1.1265 + // The section offset of the current frame description entry, for 1.1266 + // use in error messages. 1.1267 + size_t entry_offset_; 1.1268 + 1.1269 + // The return address column for that entry. 1.1270 + unsigned return_address_; 1.1271 +}; 1.1272 + 1.1273 +} // namespace lul 1.1274 + 1.1275 +#endif // LulDwarfExt_h