1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/dwarf/bytereader.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,310 @@ 1.4 +// -*- mode: C++ -*- 1.5 + 1.6 +// Copyright (c) 2010 Google Inc. All Rights Reserved. 1.7 +// 1.8 +// Redistribution and use in source and binary forms, with or without 1.9 +// modification, are permitted provided that the following conditions are 1.10 +// met: 1.11 +// 1.12 +// * Redistributions of source code must retain the above copyright 1.13 +// notice, this list of conditions and the following disclaimer. 1.14 +// * Redistributions in binary form must reproduce the above 1.15 +// copyright notice, this list of conditions and the following disclaimer 1.16 +// in the documentation and/or other materials provided with the 1.17 +// distribution. 1.18 +// * Neither the name of Google Inc. nor the names of its 1.19 +// contributors may be used to endorse or promote products derived from 1.20 +// this software without specific prior written permission. 1.21 +// 1.22 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.23 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.24 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.25 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.26 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.27 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.28 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.29 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.30 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.31 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.32 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.33 + 1.34 +#ifndef COMMON_DWARF_BYTEREADER_H__ 1.35 +#define COMMON_DWARF_BYTEREADER_H__ 1.36 + 1.37 +#include <string> 1.38 +#include "common/dwarf/types.h" 1.39 +#include "common/dwarf/dwarf2enums.h" 1.40 + 1.41 +namespace dwarf2reader { 1.42 + 1.43 +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN 1.44 +// because it conflicts with a macro 1.45 +enum Endianness { 1.46 + ENDIANNESS_BIG, 1.47 + ENDIANNESS_LITTLE 1.48 +}; 1.49 + 1.50 +// A ByteReader knows how to read single- and multi-byte values of 1.51 +// various endiannesses, sizes, and encodings, as used in DWARF 1.52 +// debugging information and Linux C++ exception handling data. 1.53 +class ByteReader { 1.54 + public: 1.55 + // Construct a ByteReader capable of reading one-, two-, four-, and 1.56 + // eight-byte values according to ENDIANNESS, absolute machine-sized 1.57 + // addresses, DWARF-style "initial length" values, signed and 1.58 + // unsigned LEB128 numbers, and Linux C++ exception handling data's 1.59 + // encoded pointers. 1.60 + explicit ByteReader(enum Endianness endianness); 1.61 + virtual ~ByteReader(); 1.62 + 1.63 + // Read a single byte from BUFFER and return it as an unsigned 8 bit 1.64 + // number. 1.65 + uint8 ReadOneByte(const char* buffer) const; 1.66 + 1.67 + // Read two bytes from BUFFER and return them as an unsigned 16 bit 1.68 + // number, using this ByteReader's endianness. 1.69 + uint16 ReadTwoBytes(const char* buffer) const; 1.70 + 1.71 + // Read four bytes from BUFFER and return them as an unsigned 32 bit 1.72 + // number, using this ByteReader's endianness. This function returns 1.73 + // a uint64 so that it is compatible with ReadAddress and 1.74 + // ReadOffset. The number it returns will never be outside the range 1.75 + // of an unsigned 32 bit integer. 1.76 + uint64 ReadFourBytes(const char* buffer) const; 1.77 + 1.78 + // Read eight bytes from BUFFER and return them as an unsigned 64 1.79 + // bit number, using this ByteReader's endianness. 1.80 + uint64 ReadEightBytes(const char* buffer) const; 1.81 + 1.82 + // Read an unsigned LEB128 (Little Endian Base 128) number from 1.83 + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to 1.84 + // the number of bytes read. 1.85 + // 1.86 + // The unsigned LEB128 representation of an integer N is a variable 1.87 + // number of bytes: 1.88 + // 1.89 + // - If N is between 0 and 0x7f, then its unsigned LEB128 1.90 + // representation is a single byte whose value is N. 1.91 + // 1.92 + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | 1.93 + // 0x80, followed by the unsigned LEB128 representation of N / 1.94 + // 128, rounded towards negative infinity. 1.95 + // 1.96 + // In other words, we break VALUE into groups of seven bits, put 1.97 + // them in little-endian order, and then write them as eight-bit 1.98 + // bytes with the high bit on all but the last. 1.99 + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; 1.100 + 1.101 + // Read a signed LEB128 number from BUFFER and return it as an 1.102 + // signed 64 bit integer. Set LEN to the number of bytes read. 1.103 + // 1.104 + // The signed LEB128 representation of an integer N is a variable 1.105 + // number of bytes: 1.106 + // 1.107 + // - If N is between -0x40 and 0x3f, then its signed LEB128 1.108 + // representation is a single byte whose value is N in two's 1.109 + // complement. 1.110 + // 1.111 + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | 1.112 + // 0x80, followed by the signed LEB128 representation of N / 128, 1.113 + // rounded towards negative infinity. 1.114 + // 1.115 + // In other words, we break VALUE into groups of seven bits, put 1.116 + // them in little-endian order, and then write them as eight-bit 1.117 + // bytes with the high bit on all but the last. 1.118 + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; 1.119 + 1.120 + // Indicate that addresses on this architecture are SIZE bytes long. SIZE 1.121 + // must be either 4 or 8. (DWARF allows addresses to be any number of 1.122 + // bytes in length from 1 to 255, but we only support 32- and 64-bit 1.123 + // addresses at the moment.) You must call this before using the 1.124 + // ReadAddress member function. 1.125 + // 1.126 + // For data in a .debug_info section, or something that .debug_info 1.127 + // refers to like line number or macro data, the compilation unit 1.128 + // header's address_size field indicates the address size to use. Call 1.129 + // frame information doesn't indicate its address size (a shortcoming of 1.130 + // the spec); you must supply the appropriate size based on the 1.131 + // architecture of the target machine. 1.132 + void SetAddressSize(uint8 size); 1.133 + 1.134 + // Return the current address size, in bytes. This is either 4, 1.135 + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. 1.136 + uint8 AddressSize() const { return address_size_; } 1.137 + 1.138 + // Read an address from BUFFER and return it as an unsigned 64 bit 1.139 + // integer, respecting this ByteReader's endianness and address size. You 1.140 + // must call SetAddressSize before calling this function. 1.141 + uint64 ReadAddress(const char* buffer) const; 1.142 + 1.143 + // DWARF actually defines two slightly different formats: 32-bit DWARF 1.144 + // and 64-bit DWARF. This is *not* related to the size of registers or 1.145 + // addresses on the target machine; it refers only to the size of section 1.146 + // offsets and data lengths appearing in the DWARF data. One only needs 1.147 + // 64-bit DWARF when the debugging data itself is larger than 4GiB. 1.148 + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the 1.149 + // debugging data itself is very large. 1.150 + // 1.151 + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each 1.152 + // compilation unit and call frame information entry begins with an 1.153 + // "initial length" field, which, in addition to giving the length of the 1.154 + // data, also indicates the size of section offsets and lengths appearing 1.155 + // in that data. The ReadInitialLength member function, below, reads an 1.156 + // initial length and sets the ByteReader's offset size as a side effect. 1.157 + // Thus, in the normal process of reading DWARF data, the appropriate 1.158 + // offset size is set automatically. So, you should only need to call 1.159 + // SetOffsetSize if you are using the same ByteReader to jump from the 1.160 + // midst of one block of DWARF data into another. 1.161 + 1.162 + // Read a DWARF "initial length" field from START, and return it as 1.163 + // an unsigned 64 bit integer, respecting this ByteReader's 1.164 + // endianness. Set *LEN to the length of the initial length in 1.165 + // bytes, either four or twelve. As a side effect, set this 1.166 + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF 1.167 + // initial length) or 8 (if we see a 64-bit DWARF initial length). 1.168 + // 1.169 + // A DWARF initial length is either: 1.170 + // 1.171 + // - a byte count stored as an unsigned 32-bit value less than 1.172 + // 0xffffff00, indicating that the data whose length is being 1.173 + // measured uses the 32-bit DWARF format, or 1.174 + // 1.175 + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, 1.176 + // indicating that the data whose length is being measured uses 1.177 + // the 64-bit DWARF format. 1.178 + uint64 ReadInitialLength(const char* start, size_t* len); 1.179 + 1.180 + // Read an offset from BUFFER and return it as an unsigned 64 bit 1.181 + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the 1.182 + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes 1.183 + // long. You must call ReadInitialLength or SetOffsetSize before calling 1.184 + // this function; see the comments above for details. 1.185 + uint64 ReadOffset(const char* buffer) const; 1.186 + 1.187 + // Return the current offset size, in bytes. 1.188 + // A return value of 4 indicates that we are reading 32-bit DWARF. 1.189 + // A return value of 8 indicates that we are reading 64-bit DWARF. 1.190 + uint8 OffsetSize() const { return offset_size_; } 1.191 + 1.192 + // Indicate that section offsets and lengths are SIZE bytes long. SIZE 1.193 + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). 1.194 + // Usually, you should not call this function yourself; instead, let a 1.195 + // call to ReadInitialLength establish the data's offset size 1.196 + // automatically. 1.197 + void SetOffsetSize(uint8 size); 1.198 + 1.199 + // The Linux C++ ABI uses a variant of DWARF call frame information 1.200 + // for exception handling. This data is included in the program's 1.201 + // address space as the ".eh_frame" section, and intepreted at 1.202 + // runtime to walk the stack, find exception handlers, and run 1.203 + // cleanup code. The format is mostly the same as DWARF CFI, with 1.204 + // some adjustments made to provide the additional 1.205 + // exception-handling data, and to make the data easier to work with 1.206 + // in memory --- for example, to allow it to be placed in read-only 1.207 + // memory even when describing position-independent code. 1.208 + // 1.209 + // In particular, exception handling data can select a number of 1.210 + // different encodings for pointers that appear in the data, as 1.211 + // described by the DwarfPointerEncoding enum. There are actually 1.212 + // four axes(!) to the encoding: 1.213 + // 1.214 + // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use 1.215 + // the DWARF LEB128 encoding. 1.216 + // 1.217 + // - The pointer's signedness: pointers can be signed or unsigned. 1.218 + // 1.219 + // - The pointer's base address: the data stored in the exception 1.220 + // handling data can be the actual address (that is, an absolute 1.221 + // pointer), or relative to one of a number of different base 1.222 + // addreses --- including that of the encoded pointer itself, for 1.223 + // a form of "pc-relative" addressing. 1.224 + // 1.225 + // - The pointer may be indirect: it may be the address where the 1.226 + // true pointer is stored. (This is used to refer to things via 1.227 + // global offset table entries, program linkage table entries, or 1.228 + // other tricks used in position-independent code.) 1.229 + // 1.230 + // There are also two options that fall outside that matrix 1.231 + // altogether: the pointer may be omitted, or it may have padding to 1.232 + // align it on an appropriate address boundary. (That last option 1.233 + // may seem like it should be just another axis, but it is not.) 1.234 + 1.235 + // Indicate that the exception handling data is loaded starting at 1.236 + // SECTION_BASE, and that the start of its buffer in our own memory 1.237 + // is BUFFER_BASE. This allows us to find the address that a given 1.238 + // byte in our buffer would have when loaded into the program the 1.239 + // data describes. We need this to resolve DW_EH_PE_pcrel pointers. 1.240 + void SetCFIDataBase(uint64 section_base, const char *buffer_base); 1.241 + 1.242 + // Indicate that the base address of the program's ".text" section 1.243 + // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. 1.244 + void SetTextBase(uint64 text_base); 1.245 + 1.246 + // Indicate that the base address for DW_EH_PE_datarel pointers is 1.247 + // DATA_BASE. The proper value depends on the ABI; it is usually the 1.248 + // address of the global offset table, held in a designated register in 1.249 + // position-independent code. You will need to look at the startup code 1.250 + // for the target system to be sure. I tried; my eyes bled. 1.251 + void SetDataBase(uint64 data_base); 1.252 + 1.253 + // Indicate that the base address for the FDE we are processing is 1.254 + // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel 1.255 + // pointers. (This encoding does not seem to be used by the GNU 1.256 + // toolchain.) 1.257 + void SetFunctionBase(uint64 function_base); 1.258 + 1.259 + // Indicate that we are no longer processing any FDE, so any use of 1.260 + // a DW_EH_PE_funcrel encoding is an error. 1.261 + void ClearFunctionBase(); 1.262 + 1.263 + // Return true if ENCODING is a valid pointer encoding. 1.264 + bool ValidEncoding(DwarfPointerEncoding encoding) const; 1.265 + 1.266 + // Return true if we have all the information we need to read a 1.267 + // pointer that uses ENCODING. This checks that the appropriate 1.268 + // SetFooBase function for ENCODING has been called. 1.269 + bool UsableEncoding(DwarfPointerEncoding encoding) const; 1.270 + 1.271 + // Read an encoded pointer from BUFFER using ENCODING; return the 1.272 + // absolute address it represents, and set *LEN to the pointer's 1.273 + // length in bytes, including any padding for aligned pointers. 1.274 + // 1.275 + // This function calls 'abort' if ENCODING is invalid or refers to a 1.276 + // base address this reader hasn't been given, so you should check 1.277 + // with ValidEncoding and UsableEncoding first if you would rather 1.278 + // die in a more helpful way. 1.279 + uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, 1.280 + size_t *len) const; 1.281 + 1.282 + private: 1.283 + 1.284 + // Function pointer type for our address and offset readers. 1.285 + typedef uint64 (ByteReader::*AddressReader)(const char*) const; 1.286 + 1.287 + // Read an offset from BUFFER and return it as an unsigned 64 bit 1.288 + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, 1.289 + // generally depending on the amount of DWARF2/3 info present. 1.290 + // This function pointer gets set by SetOffsetSize. 1.291 + AddressReader offset_reader_; 1.292 + 1.293 + // Read an address from BUFFER and return it as an unsigned 64 bit 1.294 + // integer. DWARF2/3 allow addresses to be any size from 0-255 1.295 + // bytes currently. Internally we support 4 and 8 byte addresses, 1.296 + // and will CHECK on anything else. 1.297 + // This function pointer gets set by SetAddressSize. 1.298 + AddressReader address_reader_; 1.299 + 1.300 + Endianness endian_; 1.301 + uint8 address_size_; 1.302 + uint8 offset_size_; 1.303 + 1.304 + // Base addresses for Linux C++ exception handling data's encoded pointers. 1.305 + bool have_section_base_, have_text_base_, have_data_base_; 1.306 + bool have_function_base_; 1.307 + uint64 section_base_, text_base_, data_base_, function_base_; 1.308 + const char *buffer_base_; 1.309 +}; 1.310 + 1.311 +} // namespace dwarf2reader 1.312 + 1.313 +#endif // COMMON_DWARF_BYTEREADER_H__