michael@0: // Copyright (c) 2010 Google Inc. All Rights Reserved.
michael@0: //
michael@0: // Redistribution and use in source and binary forms, with or without
michael@0: // modification, are permitted provided that the following conditions are
michael@0: // met:
michael@0: //
michael@0: //     * Redistributions of source code must retain the above copyright
michael@0: // notice, this list of conditions and the following disclaimer.
michael@0: //     * Redistributions in binary form must reproduce the above
michael@0: // copyright notice, this list of conditions and the following disclaimer
michael@0: // in the documentation and/or other materials provided with the
michael@0: // distribution.
michael@0: //     * Neither the name of Google Inc. nor the names of its
michael@0: // contributors may be used to endorse or promote products derived from
michael@0: // this software without specific prior written permission.
michael@0: //
michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0: 
michael@0: #include <assert.h>
michael@0: #include <stdlib.h>
michael@0: 
michael@0: #include "common/dwarf/bytereader-inl.h"
michael@0: #include "common/dwarf/bytereader.h"
michael@0: 
michael@0: namespace dwarf2reader {
michael@0: 
michael@0: ByteReader::ByteReader(enum Endianness endian)
michael@0:     :offset_reader_(NULL), address_reader_(NULL), endian_(endian),
michael@0:      address_size_(0), offset_size_(0),
michael@0:      have_section_base_(), have_text_base_(), have_data_base_(),
michael@0:      have_function_base_() { }
michael@0: 
michael@0: ByteReader::~ByteReader() { }
michael@0: 
michael@0: void ByteReader::SetOffsetSize(uint8 size) {
michael@0:   offset_size_ = size;
michael@0:   assert(size == 4 || size == 8);
michael@0:   if (size == 4) {
michael@0:     this->offset_reader_ = &ByteReader::ReadFourBytes;
michael@0:   } else {
michael@0:     this->offset_reader_ = &ByteReader::ReadEightBytes;
michael@0:   }
michael@0: }
michael@0: 
michael@0: void ByteReader::SetAddressSize(uint8 size) {
michael@0:   address_size_ = size;
michael@0:   assert(size == 4 || size == 8);
michael@0:   if (size == 4) {
michael@0:     this->address_reader_ = &ByteReader::ReadFourBytes;
michael@0:   } else {
michael@0:     this->address_reader_ = &ByteReader::ReadEightBytes;
michael@0:   }
michael@0: }
michael@0: 
michael@0: uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) {
michael@0:   const uint64 initial_length = ReadFourBytes(start);
michael@0:   start += 4;
michael@0: 
michael@0:   // In DWARF2/3, if the initial length is all 1 bits, then the offset
michael@0:   // size is 8 and we need to read the next 8 bytes for the real length.
michael@0:   if (initial_length == 0xffffffff) {
michael@0:     SetOffsetSize(8);
michael@0:     *len = 12;
michael@0:     return ReadOffset(start);
michael@0:   } else {
michael@0:     SetOffsetSize(4);
michael@0:     *len = 4;
michael@0:   }
michael@0:   return initial_length;
michael@0: }
michael@0: 
michael@0: bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const {
michael@0:   if (encoding == DW_EH_PE_omit) return true;
michael@0:   if (encoding == DW_EH_PE_aligned) return true;
michael@0:   if ((encoding & 0x7) > DW_EH_PE_udata8)
michael@0:     return false;
michael@0:   if ((encoding & 0x70) > DW_EH_PE_funcrel)
michael@0:     return false;
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const {
michael@0:   switch (encoding & 0x70) {
michael@0:     case DW_EH_PE_absptr:  return true;
michael@0:     case DW_EH_PE_pcrel:   return have_section_base_;
michael@0:     case DW_EH_PE_textrel: return have_text_base_;
michael@0:     case DW_EH_PE_datarel: return have_data_base_;
michael@0:     case DW_EH_PE_funcrel: return have_function_base_;
michael@0:     default:               return false;
michael@0:   }
michael@0: }
michael@0: 
michael@0: uint64 ByteReader::ReadEncodedPointer(const char *buffer,
michael@0:                                       DwarfPointerEncoding encoding,
michael@0:                                       size_t *len) const {
michael@0:   // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't
michael@0:   // see it here.
michael@0:   assert(encoding != DW_EH_PE_omit);
michael@0: 
michael@0:   // The Linux Standards Base 4.0 does not make this clear, but the
michael@0:   // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c)
michael@0:   // agree that aligned pointers are always absolute, machine-sized,
michael@0:   // machine-signed pointers.
michael@0:   if (encoding == DW_EH_PE_aligned) {
michael@0:     assert(have_section_base_);
michael@0: 
michael@0:     // We don't need to align BUFFER in *our* address space. Rather, we
michael@0:     // need to find the next position in our buffer that would be aligned
michael@0:     // when the .eh_frame section the buffer contains is loaded into the
michael@0:     // program's memory. So align assuming that buffer_base_ gets loaded at
michael@0:     // address section_base_, where section_base_ itself may or may not be
michael@0:     // aligned.
michael@0: 
michael@0:     // First, find the offset to START from the closest prior aligned
michael@0:     // address.
michael@0:     uint64 skew = section_base_ & (AddressSize() - 1);
michael@0:     // Now find the offset from that aligned address to buffer.
michael@0:     uint64 offset = skew + (buffer - buffer_base_);
michael@0:     // Round up to the next boundary.
michael@0:     uint64 aligned = (offset + AddressSize() - 1) & -AddressSize();
michael@0:     // Convert back to a pointer.
michael@0:     const char *aligned_buffer = buffer_base_ + (aligned - skew);
michael@0:     // Finally, store the length and actually fetch the pointer.
michael@0:     *len = aligned_buffer - buffer + AddressSize();
michael@0:     return ReadAddress(aligned_buffer);
michael@0:   }
michael@0: 
michael@0:   // Extract the value first, ignoring whether it's a pointer or an
michael@0:   // offset relative to some base.
michael@0:   uint64 offset;
michael@0:   switch (encoding & 0x0f) {
michael@0:     case DW_EH_PE_absptr:
michael@0:       // DW_EH_PE_absptr is weird, as it is used as a meaningful value for
michael@0:       // both the high and low nybble of encoding bytes. When it appears in
michael@0:       // the high nybble, it means that the pointer is absolute, not an
michael@0:       // offset from some base address. When it appears in the low nybble,
michael@0:       // as here, it means that the pointer is stored as a normal
michael@0:       // machine-sized and machine-signed address. A low nybble of
michael@0:       // DW_EH_PE_absptr does not imply that the pointer is absolute; it is
michael@0:       // correct for us to treat the value as an offset from a base address
michael@0:       // if the upper nybble is not DW_EH_PE_absptr.
michael@0:       offset = ReadAddress(buffer);
michael@0:       *len = AddressSize();
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_uleb128:
michael@0:       offset = ReadUnsignedLEB128(buffer, len);
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_udata2:
michael@0:       offset = ReadTwoBytes(buffer);
michael@0:       *len = 2;
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_udata4:
michael@0:       offset = ReadFourBytes(buffer);
michael@0:       *len = 4;
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_udata8:
michael@0:       offset = ReadEightBytes(buffer);
michael@0:       *len = 8;
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_sleb128:
michael@0:       offset = ReadSignedLEB128(buffer, len);
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_sdata2:
michael@0:       offset = ReadTwoBytes(buffer);
michael@0:       // Sign-extend from 16 bits.
michael@0:       offset = (offset ^ 0x8000) - 0x8000;
michael@0:       *len = 2;
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_sdata4:
michael@0:       offset = ReadFourBytes(buffer);
michael@0:       // Sign-extend from 32 bits.
michael@0:       offset = (offset ^ 0x80000000ULL) - 0x80000000ULL;
michael@0:       *len = 4;
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_sdata8:
michael@0:       // No need to sign-extend; this is the full width of our type.
michael@0:       offset = ReadEightBytes(buffer);
michael@0:       *len = 8;
michael@0:       break;
michael@0: 
michael@0:     default:
michael@0:       abort();
michael@0:   }
michael@0: 
michael@0:   // Find the appropriate base address.
michael@0:   uint64 base;
michael@0:   switch (encoding & 0x70) {
michael@0:     case DW_EH_PE_absptr:
michael@0:       base = 0;
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_pcrel:
michael@0:       assert(have_section_base_);
michael@0:       base = section_base_ + (buffer - buffer_base_);
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_textrel:
michael@0:       assert(have_text_base_);
michael@0:       base = text_base_;
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_datarel:
michael@0:       assert(have_data_base_);
michael@0:       base = data_base_;
michael@0:       break;
michael@0: 
michael@0:     case DW_EH_PE_funcrel:
michael@0:       assert(have_function_base_);
michael@0:       base = function_base_;
michael@0:       break;
michael@0: 
michael@0:     default:
michael@0:       abort();
michael@0:   }
michael@0: 
michael@0:   uint64 pointer = base + offset;
michael@0: 
michael@0:   // Remove inappropriate upper bits.
michael@0:   if (AddressSize() == 4)
michael@0:     pointer = pointer & 0xffffffff;
michael@0:   else
michael@0:     assert(AddressSize() == sizeof(uint64));
michael@0: 
michael@0:   return pointer;
michael@0: }
michael@0: 
michael@0: }  // namespace dwarf2reader