toolkit/crashreporter/google-breakpad/src/common/dwarf/bytereader.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/dwarf/bytereader.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,310 @@
     1.4 +// -*- mode: C++ -*-
     1.5 +
     1.6 +// Copyright (c) 2010 Google Inc. All Rights Reserved.
     1.7 +//
     1.8 +// Redistribution and use in source and binary forms, with or without
     1.9 +// modification, are permitted provided that the following conditions are
    1.10 +// met:
    1.11 +//
    1.12 +//     * Redistributions of source code must retain the above copyright
    1.13 +// notice, this list of conditions and the following disclaimer.
    1.14 +//     * Redistributions in binary form must reproduce the above
    1.15 +// copyright notice, this list of conditions and the following disclaimer
    1.16 +// in the documentation and/or other materials provided with the
    1.17 +// distribution.
    1.18 +//     * Neither the name of Google Inc. nor the names of its
    1.19 +// contributors may be used to endorse or promote products derived from
    1.20 +// this software without specific prior written permission.
    1.21 +//
    1.22 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    1.23 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    1.24 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    1.25 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    1.26 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    1.27 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    1.28 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    1.29 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    1.30 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    1.31 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    1.32 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.33 +
    1.34 +#ifndef COMMON_DWARF_BYTEREADER_H__
    1.35 +#define COMMON_DWARF_BYTEREADER_H__
    1.36 +
    1.37 +#include <string>
    1.38 +#include "common/dwarf/types.h"
    1.39 +#include "common/dwarf/dwarf2enums.h"
    1.40 +
    1.41 +namespace dwarf2reader {
    1.42 +
    1.43 +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
    1.44 +// because it conflicts with a macro
    1.45 +enum Endianness {
    1.46 +  ENDIANNESS_BIG,
    1.47 +  ENDIANNESS_LITTLE
    1.48 +};
    1.49 +
    1.50 +// A ByteReader knows how to read single- and multi-byte values of
    1.51 +// various endiannesses, sizes, and encodings, as used in DWARF
    1.52 +// debugging information and Linux C++ exception handling data.
    1.53 +class ByteReader {
    1.54 + public:
    1.55 +  // Construct a ByteReader capable of reading one-, two-, four-, and
    1.56 +  // eight-byte values according to ENDIANNESS, absolute machine-sized
    1.57 +  // addresses, DWARF-style "initial length" values, signed and
    1.58 +  // unsigned LEB128 numbers, and Linux C++ exception handling data's
    1.59 +  // encoded pointers.
    1.60 +  explicit ByteReader(enum Endianness endianness);
    1.61 +  virtual ~ByteReader();
    1.62 +
    1.63 +  // Read a single byte from BUFFER and return it as an unsigned 8 bit
    1.64 +  // number.
    1.65 +  uint8 ReadOneByte(const char* buffer) const;
    1.66 +
    1.67 +  // Read two bytes from BUFFER and return them as an unsigned 16 bit
    1.68 +  // number, using this ByteReader's endianness.
    1.69 +  uint16 ReadTwoBytes(const char* buffer) const;
    1.70 +
    1.71 +  // Read four bytes from BUFFER and return them as an unsigned 32 bit
    1.72 +  // number, using this ByteReader's endianness. This function returns
    1.73 +  // a uint64 so that it is compatible with ReadAddress and
    1.74 +  // ReadOffset. The number it returns will never be outside the range
    1.75 +  // of an unsigned 32 bit integer.
    1.76 +  uint64 ReadFourBytes(const char* buffer) const;
    1.77 +
    1.78 +  // Read eight bytes from BUFFER and return them as an unsigned 64
    1.79 +  // bit number, using this ByteReader's endianness.
    1.80 +  uint64 ReadEightBytes(const char* buffer) const;
    1.81 +
    1.82 +  // Read an unsigned LEB128 (Little Endian Base 128) number from
    1.83 +  // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
    1.84 +  // the number of bytes read.
    1.85 +  //
    1.86 +  // The unsigned LEB128 representation of an integer N is a variable
    1.87 +  // number of bytes:
    1.88 +  //
    1.89 +  // - If N is between 0 and 0x7f, then its unsigned LEB128
    1.90 +  //   representation is a single byte whose value is N.
    1.91 +  //
    1.92 +  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
    1.93 +  //   0x80, followed by the unsigned LEB128 representation of N /
    1.94 +  //   128, rounded towards negative infinity.
    1.95 +  //
    1.96 +  // In other words, we break VALUE into groups of seven bits, put
    1.97 +  // them in little-endian order, and then write them as eight-bit
    1.98 +  // bytes with the high bit on all but the last.
    1.99 +  uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;
   1.100 +
   1.101 +  // Read a signed LEB128 number from BUFFER and return it as an
   1.102 +  // signed 64 bit integer. Set LEN to the number of bytes read.
   1.103 +  //
   1.104 +  // The signed LEB128 representation of an integer N is a variable
   1.105 +  // number of bytes:
   1.106 +  //
   1.107 +  // - If N is between -0x40 and 0x3f, then its signed LEB128
   1.108 +  //   representation is a single byte whose value is N in two's
   1.109 +  //   complement.
   1.110 +  //
   1.111 +  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
   1.112 +  //   0x80, followed by the signed LEB128 representation of N / 128,
   1.113 +  //   rounded towards negative infinity.
   1.114 +  //
   1.115 +  // In other words, we break VALUE into groups of seven bits, put
   1.116 +  // them in little-endian order, and then write them as eight-bit
   1.117 +  // bytes with the high bit on all but the last.
   1.118 +  int64 ReadSignedLEB128(const char* buffer, size_t* len) const;
   1.119 +
   1.120 +  // Indicate that addresses on this architecture are SIZE bytes long. SIZE
   1.121 +  // must be either 4 or 8. (DWARF allows addresses to be any number of
   1.122 +  // bytes in length from 1 to 255, but we only support 32- and 64-bit
   1.123 +  // addresses at the moment.) You must call this before using the
   1.124 +  // ReadAddress member function.
   1.125 +  //
   1.126 +  // For data in a .debug_info section, or something that .debug_info
   1.127 +  // refers to like line number or macro data, the compilation unit
   1.128 +  // header's address_size field indicates the address size to use. Call
   1.129 +  // frame information doesn't indicate its address size (a shortcoming of
   1.130 +  // the spec); you must supply the appropriate size based on the
   1.131 +  // architecture of the target machine.
   1.132 +  void SetAddressSize(uint8 size);
   1.133 +
   1.134 +  // Return the current address size, in bytes. This is either 4,
   1.135 +  // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
   1.136 +  uint8 AddressSize() const { return address_size_; }
   1.137 +
   1.138 +  // Read an address from BUFFER and return it as an unsigned 64 bit
   1.139 +  // integer, respecting this ByteReader's endianness and address size. You
   1.140 +  // must call SetAddressSize before calling this function.
   1.141 +  uint64 ReadAddress(const char* buffer) const;
   1.142 +
   1.143 +  // DWARF actually defines two slightly different formats: 32-bit DWARF
   1.144 +  // and 64-bit DWARF. This is *not* related to the size of registers or
   1.145 +  // addresses on the target machine; it refers only to the size of section
   1.146 +  // offsets and data lengths appearing in the DWARF data. One only needs
   1.147 +  // 64-bit DWARF when the debugging data itself is larger than 4GiB.
   1.148 +  // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
   1.149 +  // debugging data itself is very large.
   1.150 +  //
   1.151 +  // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
   1.152 +  // compilation unit and call frame information entry begins with an
   1.153 +  // "initial length" field, which, in addition to giving the length of the
   1.154 +  // data, also indicates the size of section offsets and lengths appearing
   1.155 +  // in that data. The ReadInitialLength member function, below, reads an
   1.156 +  // initial length and sets the ByteReader's offset size as a side effect.
   1.157 +  // Thus, in the normal process of reading DWARF data, the appropriate
   1.158 +  // offset size is set automatically. So, you should only need to call
   1.159 +  // SetOffsetSize if you are using the same ByteReader to jump from the
   1.160 +  // midst of one block of DWARF data into another.
   1.161 +
   1.162 +  // Read a DWARF "initial length" field from START, and return it as
   1.163 +  // an unsigned 64 bit integer, respecting this ByteReader's
   1.164 +  // endianness. Set *LEN to the length of the initial length in
   1.165 +  // bytes, either four or twelve. As a side effect, set this
   1.166 +  // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
   1.167 +  // initial length) or 8 (if we see a 64-bit DWARF initial length).
   1.168 +  //
   1.169 +  // A DWARF initial length is either:
   1.170 +  //
   1.171 +  // - a byte count stored as an unsigned 32-bit value less than
   1.172 +  //   0xffffff00, indicating that the data whose length is being
   1.173 +  //   measured uses the 32-bit DWARF format, or
   1.174 +  //
   1.175 +  // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
   1.176 +  //   indicating that the data whose length is being measured uses
   1.177 +  //   the 64-bit DWARF format.
   1.178 +  uint64 ReadInitialLength(const char* start, size_t* len);
   1.179 +
   1.180 +  // Read an offset from BUFFER and return it as an unsigned 64 bit
   1.181 +  // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
   1.182 +  // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
   1.183 +  // long. You must call ReadInitialLength or SetOffsetSize before calling
   1.184 +  // this function; see the comments above for details.
   1.185 +  uint64 ReadOffset(const char* buffer) const;
   1.186 +
   1.187 +  // Return the current offset size, in bytes.
   1.188 +  // A return value of 4 indicates that we are reading 32-bit DWARF.
   1.189 +  // A return value of 8 indicates that we are reading 64-bit DWARF.
   1.190 +  uint8 OffsetSize() const { return offset_size_; }
   1.191 +
   1.192 +  // Indicate that section offsets and lengths are SIZE bytes long. SIZE
   1.193 +  // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
   1.194 +  // Usually, you should not call this function yourself; instead, let a
   1.195 +  // call to ReadInitialLength establish the data's offset size
   1.196 +  // automatically.
   1.197 +  void SetOffsetSize(uint8 size);
   1.198 +
   1.199 +  // The Linux C++ ABI uses a variant of DWARF call frame information
   1.200 +  // for exception handling. This data is included in the program's
   1.201 +  // address space as the ".eh_frame" section, and intepreted at
   1.202 +  // runtime to walk the stack, find exception handlers, and run
   1.203 +  // cleanup code. The format is mostly the same as DWARF CFI, with
   1.204 +  // some adjustments made to provide the additional
   1.205 +  // exception-handling data, and to make the data easier to work with
   1.206 +  // in memory --- for example, to allow it to be placed in read-only
   1.207 +  // memory even when describing position-independent code.
   1.208 +  //
   1.209 +  // In particular, exception handling data can select a number of
   1.210 +  // different encodings for pointers that appear in the data, as
   1.211 +  // described by the DwarfPointerEncoding enum. There are actually
   1.212 +  // four axes(!) to the encoding:
   1.213 +  //
   1.214 +  // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
   1.215 +  //   the DWARF LEB128 encoding.
   1.216 +  //
   1.217 +  // - The pointer's signedness: pointers can be signed or unsigned.
   1.218 +  //
   1.219 +  // - The pointer's base address: the data stored in the exception
   1.220 +  //   handling data can be the actual address (that is, an absolute
   1.221 +  //   pointer), or relative to one of a number of different base
   1.222 +  //   addreses --- including that of the encoded pointer itself, for
   1.223 +  //   a form of "pc-relative" addressing.
   1.224 +  //
   1.225 +  // - The pointer may be indirect: it may be the address where the
   1.226 +  //   true pointer is stored. (This is used to refer to things via
   1.227 +  //   global offset table entries, program linkage table entries, or
   1.228 +  //   other tricks used in position-independent code.)
   1.229 +  //
   1.230 +  // There are also two options that fall outside that matrix
   1.231 +  // altogether: the pointer may be omitted, or it may have padding to
   1.232 +  // align it on an appropriate address boundary. (That last option
   1.233 +  // may seem like it should be just another axis, but it is not.)
   1.234 +
   1.235 +  // Indicate that the exception handling data is loaded starting at
   1.236 +  // SECTION_BASE, and that the start of its buffer in our own memory
   1.237 +  // is BUFFER_BASE. This allows us to find the address that a given
   1.238 +  // byte in our buffer would have when loaded into the program the
   1.239 +  // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
   1.240 +  void SetCFIDataBase(uint64 section_base, const char *buffer_base);
   1.241 +
   1.242 +  // Indicate that the base address of the program's ".text" section
   1.243 +  // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
   1.244 +  void SetTextBase(uint64 text_base);
   1.245 +
   1.246 +  // Indicate that the base address for DW_EH_PE_datarel pointers is
   1.247 +  // DATA_BASE. The proper value depends on the ABI; it is usually the
   1.248 +  // address of the global offset table, held in a designated register in
   1.249 +  // position-independent code. You will need to look at the startup code
   1.250 +  // for the target system to be sure. I tried; my eyes bled.
   1.251 +  void SetDataBase(uint64 data_base);
   1.252 +
   1.253 +  // Indicate that the base address for the FDE we are processing is
   1.254 +  // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
   1.255 +  // pointers. (This encoding does not seem to be used by the GNU
   1.256 +  // toolchain.)
   1.257 +  void SetFunctionBase(uint64 function_base);
   1.258 +
   1.259 +  // Indicate that we are no longer processing any FDE, so any use of
   1.260 +  // a DW_EH_PE_funcrel encoding is an error.
   1.261 +  void ClearFunctionBase();
   1.262 +
   1.263 +  // Return true if ENCODING is a valid pointer encoding.
   1.264 +  bool ValidEncoding(DwarfPointerEncoding encoding) const;
   1.265 +
   1.266 +  // Return true if we have all the information we need to read a
   1.267 +  // pointer that uses ENCODING. This checks that the appropriate
   1.268 +  // SetFooBase function for ENCODING has been called.
   1.269 +  bool UsableEncoding(DwarfPointerEncoding encoding) const;
   1.270 +
   1.271 +  // Read an encoded pointer from BUFFER using ENCODING; return the
   1.272 +  // absolute address it represents, and set *LEN to the pointer's
   1.273 +  // length in bytes, including any padding for aligned pointers.
   1.274 +  //
   1.275 +  // This function calls 'abort' if ENCODING is invalid or refers to a
   1.276 +  // base address this reader hasn't been given, so you should check
   1.277 +  // with ValidEncoding and UsableEncoding first if you would rather
   1.278 +  // die in a more helpful way.
   1.279 +  uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding,
   1.280 +                            size_t *len) const;
   1.281 +
   1.282 + private:
   1.283 +
   1.284 +  // Function pointer type for our address and offset readers.
   1.285 +  typedef uint64 (ByteReader::*AddressReader)(const char*) const;
   1.286 +
   1.287 +  // Read an offset from BUFFER and return it as an unsigned 64 bit
   1.288 +  // integer.  DWARF2/3 define offsets as either 4 or 8 bytes,
   1.289 +  // generally depending on the amount of DWARF2/3 info present.
   1.290 +  // This function pointer gets set by SetOffsetSize.
   1.291 +  AddressReader offset_reader_;
   1.292 +
   1.293 +  // Read an address from BUFFER and return it as an unsigned 64 bit
   1.294 +  // integer.  DWARF2/3 allow addresses to be any size from 0-255
   1.295 +  // bytes currently.  Internally we support 4 and 8 byte addresses,
   1.296 +  // and will CHECK on anything else.
   1.297 +  // This function pointer gets set by SetAddressSize.
   1.298 +  AddressReader address_reader_;
   1.299 +
   1.300 +  Endianness endian_;
   1.301 +  uint8 address_size_;
   1.302 +  uint8 offset_size_;
   1.303 +
   1.304 +  // Base addresses for Linux C++ exception handling data's encoded pointers.
   1.305 +  bool have_section_base_, have_text_base_, have_data_base_;
   1.306 +  bool have_function_base_;
   1.307 +  uint64 section_base_, text_base_, data_base_, function_base_;
   1.308 +  const char *buffer_base_;
   1.309 +};
   1.310 +
   1.311 +}  // namespace dwarf2reader
   1.312 +
   1.313 +#endif  // COMMON_DWARF_BYTEREADER_H__

mercurial