michael@0: // Copyright (c) 2010, Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Original author: Jim Blandy michael@0: michael@0: // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and michael@0: // google_breakpad::Mach_O::Reader. See macho_reader.h for details. michael@0: michael@0: #include "common/mac/macho_reader.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: // Unfortunately, CPU_TYPE_ARM is not define for 10.4. michael@0: #if !defined(CPU_TYPE_ARM) michael@0: #define CPU_TYPE_ARM 12 michael@0: #endif michael@0: michael@0: namespace google_breakpad { michael@0: namespace mach_o { michael@0: michael@0: // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its michael@0: // arguments, so you can't place expressions that do necessary work in michael@0: // the argument of an assert. Nor can you assign the result of the michael@0: // expression to a variable and assert that the variable's value is michael@0: // true: you'll get unused variable warnings when NDEBUG is #defined. michael@0: // michael@0: // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that michael@0: // the result is true if NDEBUG is not #defined. michael@0: #if defined(NDEBUG) michael@0: #define ASSERT_ALWAYS_EVAL(x) (x) michael@0: #else michael@0: #define ASSERT_ALWAYS_EVAL(x) assert(x) michael@0: #endif michael@0: michael@0: void FatReader::Reporter::BadHeader() { michael@0: fprintf(stderr, "%s: file is neither a fat binary file" michael@0: " nor a Mach-O object file\n", filename_.c_str()); michael@0: } michael@0: michael@0: void FatReader::Reporter::TooShort() { michael@0: fprintf(stderr, "%s: file too short for the data it claims to contain\n", michael@0: filename_.c_str()); michael@0: } michael@0: michael@0: void FatReader::Reporter::MisplacedObjectFile() { michael@0: fprintf(stderr, "%s: file too short for the object files it claims" michael@0: " to contain\n", filename_.c_str()); michael@0: } michael@0: michael@0: bool FatReader::Read(const uint8_t *buffer, size_t size) { michael@0: buffer_.start = buffer; michael@0: buffer_.end = buffer + size; michael@0: ByteCursor cursor(&buffer_); michael@0: michael@0: // Fat binaries always use big-endian, so read the magic number in michael@0: // that endianness. To recognize Mach-O magic numbers, which can use michael@0: // either endianness, check for both the proper and reversed forms michael@0: // of the magic numbers. michael@0: cursor.set_big_endian(true); michael@0: if (cursor >> magic_) { michael@0: if (magic_ == FAT_MAGIC) { michael@0: // How many object files does this fat binary contain? michael@0: uint32_t object_files_count; michael@0: if (!(cursor >> object_files_count)) { // nfat_arch michael@0: reporter_->TooShort(); michael@0: return false; michael@0: } michael@0: michael@0: // Read the list of object files. michael@0: object_files_.resize(object_files_count); michael@0: for (size_t i = 0; i < object_files_count; i++) { michael@0: struct fat_arch *objfile = &object_files_[i]; michael@0: michael@0: // Read this object file entry, byte-swapping as appropriate. michael@0: cursor >> objfile->cputype michael@0: >> objfile->cpusubtype michael@0: >> objfile->offset michael@0: >> objfile->size michael@0: >> objfile->align; michael@0: if (!cursor) { michael@0: reporter_->TooShort(); michael@0: return false; michael@0: } michael@0: // Does the file actually have the bytes this entry refers to? michael@0: size_t fat_size = buffer_.Size(); michael@0: if (objfile->offset > fat_size || michael@0: objfile->size > fat_size - objfile->offset) { michael@0: reporter_->MisplacedObjectFile(); michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 || michael@0: magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) { michael@0: // If this is a little-endian Mach-O file, fix the cursor's endianness. michael@0: if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) michael@0: cursor.set_big_endian(false); michael@0: // Record the entire file as a single entry in the object file list. michael@0: object_files_.resize(1); michael@0: michael@0: // Get the cpu type and subtype from the Mach-O header. michael@0: if (!(cursor >> object_files_[0].cputype michael@0: >> object_files_[0].cpusubtype)) { michael@0: reporter_->TooShort(); michael@0: return false; michael@0: } michael@0: michael@0: object_files_[0].offset = 0; michael@0: object_files_[0].size = static_cast(buffer_.Size()); michael@0: // This alignment is correct for 32 and 64-bit x86 and ppc. michael@0: // See get_align in the lipo source for other architectures: michael@0: // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c michael@0: object_files_[0].align = 12; // 2^12 == 4096 michael@0: michael@0: return true; michael@0: } michael@0: } michael@0: michael@0: reporter_->BadHeader(); michael@0: return false; michael@0: } michael@0: michael@0: void Reader::Reporter::BadHeader() { michael@0: fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str()); michael@0: } michael@0: michael@0: void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type, michael@0: cpu_subtype_t cpu_subtype, michael@0: cpu_type_t expected_cpu_type, michael@0: cpu_subtype_t expected_cpu_subtype) { michael@0: fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected" michael@0: " type %d, subtype %d\n", michael@0: filename_.c_str(), cpu_type, cpu_subtype, michael@0: expected_cpu_type, expected_cpu_subtype); michael@0: } michael@0: michael@0: void Reader::Reporter::HeaderTruncated() { michael@0: fprintf(stderr, "%s: file does not contain a complete Mach-O header\n", michael@0: filename_.c_str()); michael@0: } michael@0: michael@0: void Reader::Reporter::LoadCommandRegionTruncated() { michael@0: fprintf(stderr, "%s: file too short to hold load command region" michael@0: " given in Mach-O header\n", filename_.c_str()); michael@0: } michael@0: michael@0: void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i, michael@0: LoadCommandType type) { michael@0: fprintf(stderr, "%s: file's header claims there are %ld" michael@0: " load commands, but load command #%ld", michael@0: filename_.c_str(), claimed, i); michael@0: if (type) fprintf(stderr, ", of type %d,", type); michael@0: fprintf(stderr, " extends beyond the end of the load command region\n"); michael@0: } michael@0: michael@0: void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) { michael@0: fprintf(stderr, "%s: the contents of load command #%ld, of type %d," michael@0: " extend beyond the size given in the load command's header\n", michael@0: filename_.c_str(), i, type); michael@0: } michael@0: michael@0: void Reader::Reporter::SectionsMissing(const string &name) { michael@0: fprintf(stderr, "%s: the load command for segment '%s'" michael@0: " is too short to hold the section headers it claims to have\n", michael@0: filename_.c_str(), name.c_str()); michael@0: } michael@0: michael@0: void Reader::Reporter::MisplacedSegmentData(const string &name) { michael@0: fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond" michael@0: " the end of the file\n", filename_.c_str(), name.c_str()); michael@0: } michael@0: michael@0: void Reader::Reporter::MisplacedSectionData(const string §ion, michael@0: const string &segment) { michael@0: fprintf(stderr, "%s: the section '%s' in segment '%s'" michael@0: " claims its contents lie outside the segment's contents\n", michael@0: filename_.c_str(), section.c_str(), segment.c_str()); michael@0: } michael@0: michael@0: void Reader::Reporter::MisplacedSymbolTable() { michael@0: fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol" michael@0: " table's contents are located beyond the end of the file\n", michael@0: filename_.c_str()); michael@0: } michael@0: michael@0: void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) { michael@0: fprintf(stderr, "%s: CPU type %d is not supported\n", michael@0: filename_.c_str(), cpu_type); michael@0: } michael@0: michael@0: bool Reader::Read(const uint8_t *buffer, michael@0: size_t size, michael@0: cpu_type_t expected_cpu_type, michael@0: cpu_subtype_t expected_cpu_subtype) { michael@0: assert(!buffer_.start); michael@0: buffer_.start = buffer; michael@0: buffer_.end = buffer + size; michael@0: ByteCursor cursor(&buffer_, true); michael@0: uint32_t magic; michael@0: if (!(cursor >> magic)) { michael@0: reporter_->HeaderTruncated(); michael@0: return false; michael@0: } michael@0: michael@0: if (expected_cpu_type != CPU_TYPE_ANY) { michael@0: uint32_t expected_magic; michael@0: // validate that magic matches the expected cpu type michael@0: switch (expected_cpu_type) { michael@0: case CPU_TYPE_ARM: michael@0: case CPU_TYPE_I386: michael@0: expected_magic = MH_CIGAM; michael@0: break; michael@0: case CPU_TYPE_POWERPC: michael@0: expected_magic = MH_MAGIC; michael@0: break; michael@0: case CPU_TYPE_X86_64: michael@0: expected_magic = MH_CIGAM_64; michael@0: break; michael@0: case CPU_TYPE_POWERPC64: michael@0: expected_magic = MH_MAGIC_64; michael@0: break; michael@0: default: michael@0: reporter_->UnsupportedCPUType(expected_cpu_type); michael@0: return false; michael@0: } michael@0: michael@0: if (expected_magic != magic) { michael@0: reporter_->BadHeader(); michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: // Since the byte cursor is in big-endian mode, a reversed magic number michael@0: // always indicates a little-endian file, regardless of our own endianness. michael@0: switch (magic) { michael@0: case MH_MAGIC: big_endian_ = true; bits_64_ = false; break; michael@0: case MH_CIGAM: big_endian_ = false; bits_64_ = false; break; michael@0: case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break; michael@0: case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break; michael@0: default: michael@0: reporter_->BadHeader(); michael@0: return false; michael@0: } michael@0: cursor.set_big_endian(big_endian_); michael@0: uint32_t commands_size, reserved; michael@0: cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_ michael@0: >> commands_size >> flags_; michael@0: if (bits_64_) michael@0: cursor >> reserved; michael@0: if (!cursor) { michael@0: reporter_->HeaderTruncated(); michael@0: return false; michael@0: } michael@0: michael@0: if (expected_cpu_type != CPU_TYPE_ANY && michael@0: (expected_cpu_type != cpu_type_ || michael@0: expected_cpu_subtype != cpu_subtype_)) { michael@0: reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_, michael@0: expected_cpu_type, expected_cpu_subtype); michael@0: return false; michael@0: } michael@0: michael@0: cursor michael@0: .PointTo(&load_commands_.start, commands_size) michael@0: .PointTo(&load_commands_.end, 0); michael@0: if (!cursor) { michael@0: reporter_->LoadCommandRegionTruncated(); michael@0: return false; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const { michael@0: ByteCursor list_cursor(&load_commands_, big_endian_); michael@0: michael@0: for (size_t index = 0; index < load_command_count_; ++index) { michael@0: // command refers to this load command alone, so that cursor will michael@0: // refuse to read past the load command's end. But since we haven't michael@0: // read the size yet, let command initially refer to the entire michael@0: // remainder of the load command series. michael@0: ByteBuffer command(list_cursor.here(), list_cursor.Available()); michael@0: ByteCursor cursor(&command, big_endian_); michael@0: michael@0: // Read the command type and size --- fields common to all commands. michael@0: uint32_t type, size; michael@0: if (!(cursor >> type)) { michael@0: reporter_->LoadCommandsOverrun(load_command_count_, index, 0); michael@0: return false; michael@0: } michael@0: if (!(cursor >> size) || size > command.Size()) { michael@0: reporter_->LoadCommandsOverrun(load_command_count_, index, type); michael@0: return false; michael@0: } michael@0: michael@0: // Now that we've read the length, restrict command's range to this michael@0: // load command only. michael@0: command.end = command.start + size; michael@0: michael@0: switch (type) { michael@0: case LC_SEGMENT: michael@0: case LC_SEGMENT_64: { michael@0: Segment segment; michael@0: segment.bits_64 = (type == LC_SEGMENT_64); michael@0: size_t word_size = segment.bits_64 ? 8 : 4; michael@0: cursor.CString(&segment.name, 16); michael@0: size_t file_offset, file_size; michael@0: cursor michael@0: .Read(word_size, false, &segment.vmaddr) michael@0: .Read(word_size, false, &segment.vmsize) michael@0: .Read(word_size, false, &file_offset) michael@0: .Read(word_size, false, &file_size); michael@0: cursor >> segment.maxprot michael@0: >> segment.initprot michael@0: >> segment.nsects michael@0: >> segment.flags; michael@0: if (!cursor) { michael@0: reporter_->LoadCommandTooShort(index, type); michael@0: return false; michael@0: } michael@0: if (file_offset > buffer_.Size() || michael@0: file_size > buffer_.Size() - file_offset) { michael@0: reporter_->MisplacedSegmentData(segment.name); michael@0: return false; michael@0: } michael@0: // Mach-O files in .dSYM bundles have the contents of the loaded michael@0: // segments removed, and their file offsets and file sizes zeroed michael@0: // out. To help us handle this special case properly, give such michael@0: // segments' contents NULL starting and ending pointers. michael@0: if (file_offset == 0 && file_size == 0) { michael@0: segment.contents.start = segment.contents.end = NULL; michael@0: } else { michael@0: segment.contents.start = buffer_.start + file_offset; michael@0: segment.contents.end = segment.contents.start + file_size; michael@0: } michael@0: // The section list occupies the remainder of this load command's space. michael@0: segment.section_list.start = cursor.here(); michael@0: segment.section_list.end = command.end; michael@0: michael@0: if (!handler->SegmentCommand(segment)) michael@0: return false; michael@0: break; michael@0: } michael@0: michael@0: case LC_SYMTAB: { michael@0: uint32_t symoff, nsyms, stroff, strsize; michael@0: cursor >> symoff >> nsyms >> stroff >> strsize; michael@0: if (!cursor) { michael@0: reporter_->LoadCommandTooShort(index, type); michael@0: return false; michael@0: } michael@0: // How big are the entries in the symbol table? michael@0: // sizeof(struct nlist_64) : sizeof(struct nlist), michael@0: // but be paranoid about alignment vs. target architecture. michael@0: size_t symbol_size = bits_64_ ? 16 : 12; michael@0: // How big is the entire symbol array? michael@0: size_t symbols_size = nsyms * symbol_size; michael@0: if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff || michael@0: stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) { michael@0: reporter_->MisplacedSymbolTable(); michael@0: return false; michael@0: } michael@0: ByteBuffer entries(buffer_.start + symoff, symbols_size); michael@0: ByteBuffer names(buffer_.start + stroff, strsize); michael@0: if (!handler->SymtabCommand(entries, names)) michael@0: return false; michael@0: break; michael@0: } michael@0: michael@0: default: { michael@0: if (!handler->UnknownCommand(type, command)) michael@0: return false; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: list_cursor.set_here(command.end); michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: // A load command handler that looks for a segment of a given name. michael@0: class Reader::SegmentFinder : public LoadCommandHandler { michael@0: public: michael@0: // Create a load command handler that looks for a segment named NAME, michael@0: // and sets SEGMENT to describe it if found. michael@0: SegmentFinder(const string &name, Segment *segment) michael@0: : name_(name), segment_(segment), found_() { } michael@0: michael@0: // Return true if the traversal found the segment, false otherwise. michael@0: bool found() const { return found_; } michael@0: michael@0: bool SegmentCommand(const Segment &segment) { michael@0: if (segment.name == name_) { michael@0: *segment_ = segment; michael@0: found_ = true; michael@0: return false; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: private: michael@0: // The name of the segment our creator is looking for. michael@0: const string &name_; michael@0: michael@0: // Where we should store the segment if found. (WEAK) michael@0: Segment *segment_; michael@0: michael@0: // True if we found the segment. michael@0: bool found_; michael@0: }; michael@0: michael@0: bool Reader::FindSegment(const string &name, Segment *segment) const { michael@0: SegmentFinder finder(name, segment); michael@0: WalkLoadCommands(&finder); michael@0: return finder.found(); michael@0: } michael@0: michael@0: bool Reader::WalkSegmentSections(const Segment &segment, michael@0: SectionHandler *handler) const { michael@0: size_t word_size = segment.bits_64 ? 8 : 4; michael@0: ByteCursor cursor(&segment.section_list, big_endian_); michael@0: michael@0: for (size_t i = 0; i < segment.nsects; i++) { michael@0: Section section; michael@0: section.bits_64 = segment.bits_64; michael@0: uint64_t size; michael@0: uint32_t offset, dummy32; michael@0: cursor michael@0: .CString(§ion.section_name, 16) michael@0: .CString(§ion.segment_name, 16) michael@0: .Read(word_size, false, §ion.address) michael@0: .Read(word_size, false, &size) michael@0: >> offset michael@0: >> section.align michael@0: >> dummy32 michael@0: >> dummy32 michael@0: >> section.flags michael@0: >> dummy32 michael@0: >> dummy32; michael@0: if (section.bits_64) michael@0: cursor >> dummy32; michael@0: if (!cursor) { michael@0: reporter_->SectionsMissing(segment.name); michael@0: return false; michael@0: } michael@0: if ((section.flags & SECTION_TYPE) == S_ZEROFILL) { michael@0: // Zero-fill sections have a size, but no contents. michael@0: section.contents.start = section.contents.end = NULL; michael@0: } else if (segment.contents.start == NULL && michael@0: segment.contents.end == NULL) { michael@0: // Mach-O files in .dSYM bundles have the contents of the loaded michael@0: // segments removed, and their file offsets and file sizes zeroed michael@0: // out. However, the sections within those segments still have michael@0: // non-zero sizes. There's no reason to call MisplacedSectionData in michael@0: // this case; the caller may just need the section's load michael@0: // address. But do set the contents' limits to NULL, for safety. michael@0: section.contents.start = section.contents.end = NULL; michael@0: } else { michael@0: if (offset < size_t(segment.contents.start - buffer_.start) || michael@0: offset > size_t(segment.contents.end - buffer_.start) || michael@0: size > size_t(segment.contents.end - buffer_.start - offset)) { michael@0: reporter_->MisplacedSectionData(section.section_name, michael@0: section.segment_name); michael@0: return false; michael@0: } michael@0: section.contents.start = buffer_.start + offset; michael@0: section.contents.end = section.contents.start + size; michael@0: } michael@0: if (!handler->HandleSection(section)) michael@0: return false; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: // A SectionHandler that builds a SectionMap for the sections within a michael@0: // given segment. michael@0: class Reader::SectionMapper: public SectionHandler { michael@0: public: michael@0: // Create a SectionHandler that populates MAP with an entry for michael@0: // each section it is given. michael@0: SectionMapper(SectionMap *map) : map_(map) { } michael@0: bool HandleSection(const Section §ion) { michael@0: (*map_)[section.section_name] = section; michael@0: return true; michael@0: } michael@0: private: michael@0: // The map under construction. (WEAK) michael@0: SectionMap *map_; michael@0: }; michael@0: michael@0: bool Reader::MapSegmentSections(const Segment &segment, michael@0: SectionMap *section_map) const { michael@0: section_map->clear(); michael@0: SectionMapper mapper(section_map); michael@0: return WalkSegmentSections(segment, &mapper); michael@0: } michael@0: michael@0: } // namespace mach_o michael@0: } // namespace google_breakpad