michael@0: // -*- mode: C++ -*- michael@0: michael@0: // Copyright (c) 2010 Google Inc. All Rights Reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: #ifndef COMMON_DWARF_BYTEREADER_H__ michael@0: #define COMMON_DWARF_BYTEREADER_H__ michael@0: michael@0: #include michael@0: #include "common/dwarf/types.h" michael@0: #include "common/dwarf/dwarf2enums.h" michael@0: michael@0: namespace dwarf2reader { michael@0: michael@0: // We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN michael@0: // because it conflicts with a macro michael@0: enum Endianness { michael@0: ENDIANNESS_BIG, michael@0: ENDIANNESS_LITTLE michael@0: }; michael@0: michael@0: // A ByteReader knows how to read single- and multi-byte values of michael@0: // various endiannesses, sizes, and encodings, as used in DWARF michael@0: // debugging information and Linux C++ exception handling data. michael@0: class ByteReader { michael@0: public: michael@0: // Construct a ByteReader capable of reading one-, two-, four-, and michael@0: // eight-byte values according to ENDIANNESS, absolute machine-sized michael@0: // addresses, DWARF-style "initial length" values, signed and michael@0: // unsigned LEB128 numbers, and Linux C++ exception handling data's michael@0: // encoded pointers. michael@0: explicit ByteReader(enum Endianness endianness); michael@0: virtual ~ByteReader(); michael@0: michael@0: // Read a single byte from BUFFER and return it as an unsigned 8 bit michael@0: // number. michael@0: uint8 ReadOneByte(const char* buffer) const; michael@0: michael@0: // Read two bytes from BUFFER and return them as an unsigned 16 bit michael@0: // number, using this ByteReader's endianness. michael@0: uint16 ReadTwoBytes(const char* buffer) const; michael@0: michael@0: // Read four bytes from BUFFER and return them as an unsigned 32 bit michael@0: // number, using this ByteReader's endianness. This function returns michael@0: // a uint64 so that it is compatible with ReadAddress and michael@0: // ReadOffset. The number it returns will never be outside the range michael@0: // of an unsigned 32 bit integer. michael@0: uint64 ReadFourBytes(const char* buffer) const; michael@0: michael@0: // Read eight bytes from BUFFER and return them as an unsigned 64 michael@0: // bit number, using this ByteReader's endianness. michael@0: uint64 ReadEightBytes(const char* buffer) const; michael@0: michael@0: // Read an unsigned LEB128 (Little Endian Base 128) number from michael@0: // BUFFER and return it as an unsigned 64 bit integer. Set LEN to michael@0: // the number of bytes read. michael@0: // michael@0: // The unsigned LEB128 representation of an integer N is a variable michael@0: // number of bytes: michael@0: // michael@0: // - If N is between 0 and 0x7f, then its unsigned LEB128 michael@0: // representation is a single byte whose value is N. michael@0: // michael@0: // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | michael@0: // 0x80, followed by the unsigned LEB128 representation of N / michael@0: // 128, rounded towards negative infinity. michael@0: // michael@0: // In other words, we break VALUE into groups of seven bits, put michael@0: // them in little-endian order, and then write them as eight-bit michael@0: // bytes with the high bit on all but the last. michael@0: uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; michael@0: michael@0: // Read a signed LEB128 number from BUFFER and return it as an michael@0: // signed 64 bit integer. Set LEN to the number of bytes read. michael@0: // michael@0: // The signed LEB128 representation of an integer N is a variable michael@0: // number of bytes: michael@0: // michael@0: // - If N is between -0x40 and 0x3f, then its signed LEB128 michael@0: // representation is a single byte whose value is N in two's michael@0: // complement. michael@0: // michael@0: // - Otherwise, its signed LEB128 representation is (N & 0x7f) | michael@0: // 0x80, followed by the signed LEB128 representation of N / 128, michael@0: // rounded towards negative infinity. michael@0: // michael@0: // In other words, we break VALUE into groups of seven bits, put michael@0: // them in little-endian order, and then write them as eight-bit michael@0: // bytes with the high bit on all but the last. michael@0: int64 ReadSignedLEB128(const char* buffer, size_t* len) const; michael@0: michael@0: // Indicate that addresses on this architecture are SIZE bytes long. SIZE michael@0: // must be either 4 or 8. (DWARF allows addresses to be any number of michael@0: // bytes in length from 1 to 255, but we only support 32- and 64-bit michael@0: // addresses at the moment.) You must call this before using the michael@0: // ReadAddress member function. michael@0: // michael@0: // For data in a .debug_info section, or something that .debug_info michael@0: // refers to like line number or macro data, the compilation unit michael@0: // header's address_size field indicates the address size to use. Call michael@0: // frame information doesn't indicate its address size (a shortcoming of michael@0: // the spec); you must supply the appropriate size based on the michael@0: // architecture of the target machine. michael@0: void SetAddressSize(uint8 size); michael@0: michael@0: // Return the current address size, in bytes. This is either 4, michael@0: // indicating 32-bit addresses, or 8, indicating 64-bit addresses. michael@0: uint8 AddressSize() const { return address_size_; } michael@0: michael@0: // Read an address from BUFFER and return it as an unsigned 64 bit michael@0: // integer, respecting this ByteReader's endianness and address size. You michael@0: // must call SetAddressSize before calling this function. michael@0: uint64 ReadAddress(const char* buffer) const; michael@0: michael@0: // DWARF actually defines two slightly different formats: 32-bit DWARF michael@0: // and 64-bit DWARF. This is *not* related to the size of registers or michael@0: // addresses on the target machine; it refers only to the size of section michael@0: // offsets and data lengths appearing in the DWARF data. One only needs michael@0: // 64-bit DWARF when the debugging data itself is larger than 4GiB. michael@0: // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the michael@0: // debugging data itself is very large. michael@0: // michael@0: // DWARF information identifies itself as 32-bit or 64-bit DWARF: each michael@0: // compilation unit and call frame information entry begins with an michael@0: // "initial length" field, which, in addition to giving the length of the michael@0: // data, also indicates the size of section offsets and lengths appearing michael@0: // in that data. The ReadInitialLength member function, below, reads an michael@0: // initial length and sets the ByteReader's offset size as a side effect. michael@0: // Thus, in the normal process of reading DWARF data, the appropriate michael@0: // offset size is set automatically. So, you should only need to call michael@0: // SetOffsetSize if you are using the same ByteReader to jump from the michael@0: // midst of one block of DWARF data into another. michael@0: michael@0: // Read a DWARF "initial length" field from START, and return it as michael@0: // an unsigned 64 bit integer, respecting this ByteReader's michael@0: // endianness. Set *LEN to the length of the initial length in michael@0: // bytes, either four or twelve. As a side effect, set this michael@0: // ByteReader's offset size to either 4 (if we see a 32-bit DWARF michael@0: // initial length) or 8 (if we see a 64-bit DWARF initial length). michael@0: // michael@0: // A DWARF initial length is either: michael@0: // michael@0: // - a byte count stored as an unsigned 32-bit value less than michael@0: // 0xffffff00, indicating that the data whose length is being michael@0: // measured uses the 32-bit DWARF format, or michael@0: // michael@0: // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, michael@0: // indicating that the data whose length is being measured uses michael@0: // the 64-bit DWARF format. michael@0: uint64 ReadInitialLength(const char* start, size_t* len); michael@0: michael@0: // Read an offset from BUFFER and return it as an unsigned 64 bit michael@0: // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the michael@0: // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes michael@0: // long. You must call ReadInitialLength or SetOffsetSize before calling michael@0: // this function; see the comments above for details. michael@0: uint64 ReadOffset(const char* buffer) const; michael@0: michael@0: // Return the current offset size, in bytes. michael@0: // A return value of 4 indicates that we are reading 32-bit DWARF. michael@0: // A return value of 8 indicates that we are reading 64-bit DWARF. michael@0: uint8 OffsetSize() const { return offset_size_; } michael@0: michael@0: // Indicate that section offsets and lengths are SIZE bytes long. SIZE michael@0: // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). michael@0: // Usually, you should not call this function yourself; instead, let a michael@0: // call to ReadInitialLength establish the data's offset size michael@0: // automatically. michael@0: void SetOffsetSize(uint8 size); michael@0: michael@0: // The Linux C++ ABI uses a variant of DWARF call frame information michael@0: // for exception handling. This data is included in the program's michael@0: // address space as the ".eh_frame" section, and intepreted at michael@0: // runtime to walk the stack, find exception handlers, and run michael@0: // cleanup code. The format is mostly the same as DWARF CFI, with michael@0: // some adjustments made to provide the additional michael@0: // exception-handling data, and to make the data easier to work with michael@0: // in memory --- for example, to allow it to be placed in read-only michael@0: // memory even when describing position-independent code. michael@0: // michael@0: // In particular, exception handling data can select a number of michael@0: // different encodings for pointers that appear in the data, as michael@0: // described by the DwarfPointerEncoding enum. There are actually michael@0: // four axes(!) to the encoding: michael@0: // michael@0: // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use michael@0: // the DWARF LEB128 encoding. michael@0: // michael@0: // - The pointer's signedness: pointers can be signed or unsigned. michael@0: // michael@0: // - The pointer's base address: the data stored in the exception michael@0: // handling data can be the actual address (that is, an absolute michael@0: // pointer), or relative to one of a number of different base michael@0: // addreses --- including that of the encoded pointer itself, for michael@0: // a form of "pc-relative" addressing. michael@0: // michael@0: // - The pointer may be indirect: it may be the address where the michael@0: // true pointer is stored. (This is used to refer to things via michael@0: // global offset table entries, program linkage table entries, or michael@0: // other tricks used in position-independent code.) michael@0: // michael@0: // There are also two options that fall outside that matrix michael@0: // altogether: the pointer may be omitted, or it may have padding to michael@0: // align it on an appropriate address boundary. (That last option michael@0: // may seem like it should be just another axis, but it is not.) michael@0: michael@0: // Indicate that the exception handling data is loaded starting at michael@0: // SECTION_BASE, and that the start of its buffer in our own memory michael@0: // is BUFFER_BASE. This allows us to find the address that a given michael@0: // byte in our buffer would have when loaded into the program the michael@0: // data describes. We need this to resolve DW_EH_PE_pcrel pointers. michael@0: void SetCFIDataBase(uint64 section_base, const char *buffer_base); michael@0: michael@0: // Indicate that the base address of the program's ".text" section michael@0: // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. michael@0: void SetTextBase(uint64 text_base); michael@0: michael@0: // Indicate that the base address for DW_EH_PE_datarel pointers is michael@0: // DATA_BASE. The proper value depends on the ABI; it is usually the michael@0: // address of the global offset table, held in a designated register in michael@0: // position-independent code. You will need to look at the startup code michael@0: // for the target system to be sure. I tried; my eyes bled. michael@0: void SetDataBase(uint64 data_base); michael@0: michael@0: // Indicate that the base address for the FDE we are processing is michael@0: // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel michael@0: // pointers. (This encoding does not seem to be used by the GNU michael@0: // toolchain.) michael@0: void SetFunctionBase(uint64 function_base); michael@0: michael@0: // Indicate that we are no longer processing any FDE, so any use of michael@0: // a DW_EH_PE_funcrel encoding is an error. michael@0: void ClearFunctionBase(); michael@0: michael@0: // Return true if ENCODING is a valid pointer encoding. michael@0: bool ValidEncoding(DwarfPointerEncoding encoding) const; michael@0: michael@0: // Return true if we have all the information we need to read a michael@0: // pointer that uses ENCODING. This checks that the appropriate michael@0: // SetFooBase function for ENCODING has been called. michael@0: bool UsableEncoding(DwarfPointerEncoding encoding) const; michael@0: michael@0: // Read an encoded pointer from BUFFER using ENCODING; return the michael@0: // absolute address it represents, and set *LEN to the pointer's michael@0: // length in bytes, including any padding for aligned pointers. michael@0: // michael@0: // This function calls 'abort' if ENCODING is invalid or refers to a michael@0: // base address this reader hasn't been given, so you should check michael@0: // with ValidEncoding and UsableEncoding first if you would rather michael@0: // die in a more helpful way. michael@0: uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, michael@0: size_t *len) const; michael@0: michael@0: private: michael@0: michael@0: // Function pointer type for our address and offset readers. michael@0: typedef uint64 (ByteReader::*AddressReader)(const char*) const; michael@0: michael@0: // Read an offset from BUFFER and return it as an unsigned 64 bit michael@0: // integer. DWARF2/3 define offsets as either 4 or 8 bytes, michael@0: // generally depending on the amount of DWARF2/3 info present. michael@0: // This function pointer gets set by SetOffsetSize. michael@0: AddressReader offset_reader_; michael@0: michael@0: // Read an address from BUFFER and return it as an unsigned 64 bit michael@0: // integer. DWARF2/3 allow addresses to be any size from 0-255 michael@0: // bytes currently. Internally we support 4 and 8 byte addresses, michael@0: // and will CHECK on anything else. michael@0: // This function pointer gets set by SetAddressSize. michael@0: AddressReader address_reader_; michael@0: michael@0: Endianness endian_; michael@0: uint8 address_size_; michael@0: uint8 offset_size_; michael@0: michael@0: // Base addresses for Linux C++ exception handling data's encoded pointers. michael@0: bool have_section_base_, have_text_base_, have_data_base_; michael@0: bool have_function_base_; michael@0: uint64 section_base_, text_base_, data_base_, function_base_; michael@0: const char *buffer_base_; michael@0: }; michael@0: michael@0: } // namespace dwarf2reader michael@0: michael@0: #endif // COMMON_DWARF_BYTEREADER_H__