michael@0: // -*- mode: C++ -*- michael@0: michael@0: // Copyright (c) 2010, Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Original author: Jim Blandy michael@0: michael@0: // macho_reader.h: A class for parsing Mach-O files. michael@0: michael@0: #ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_ michael@0: #define BREAKPAD_COMMON_MAC_MACHO_READER_H_ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "common/byte_cursor.h" michael@0: michael@0: namespace google_breakpad { michael@0: namespace mach_o { michael@0: michael@0: using std::map; michael@0: using std::string; michael@0: using std::vector; michael@0: michael@0: // The Mac headers don't specify particular types for these groups of michael@0: // constants, but defining them here provides some documentation michael@0: // value. We also give them the same width as the fields in which michael@0: // they appear, which makes them a bit easier to use with ByteCursors. michael@0: typedef uint32_t Magic; michael@0: typedef uint32_t FileType; michael@0: typedef uint32_t FileFlags; michael@0: typedef uint32_t LoadCommandType; michael@0: typedef uint32_t SegmentFlags; michael@0: typedef uint32_t SectionFlags; michael@0: michael@0: // A parser for fat binary files, used to store universal binaries. michael@0: // When applied to a (non-fat) Mach-O file, this behaves as if the michael@0: // file were a fat file containing a single object file. michael@0: class FatReader { michael@0: public: michael@0: michael@0: // A class for reporting errors found while parsing fat binary files. The michael@0: // default definitions of these methods print messages to stderr. michael@0: class Reporter { michael@0: public: michael@0: // Create a reporter that attributes problems to |filename|. michael@0: explicit Reporter(const string &filename) : filename_(filename) { } michael@0: michael@0: virtual ~Reporter() { } michael@0: michael@0: // The data does not begin with a fat binary or Mach-O magic number. michael@0: // This is a fatal error. michael@0: virtual void BadHeader(); michael@0: michael@0: // The Mach-O fat binary file ends abruptly, without enough space michael@0: // to contain an object file it claims is present. michael@0: virtual void MisplacedObjectFile(); michael@0: michael@0: // The file ends abruptly: either it is not large enough to hold a michael@0: // complete header, or the header implies that contents are present michael@0: // beyond the actual end of the file. michael@0: virtual void TooShort(); michael@0: michael@0: private: michael@0: // The filename to which the reader should attribute problems. michael@0: string filename_; michael@0: }; michael@0: michael@0: // Create a fat binary file reader that uses |reporter| to report problems. michael@0: explicit FatReader(Reporter *reporter) : reporter_(reporter) { } michael@0: michael@0: // Read the |size| bytes at |buffer| as a fat binary file. On success, michael@0: // return true; on failure, report the problem to reporter_ and return michael@0: // false. michael@0: // michael@0: // If the data is a plain Mach-O file, rather than a fat binary file, michael@0: // then the reader behaves as if it had found a fat binary file whose michael@0: // single object file is the Mach-O file. michael@0: bool Read(const uint8_t *buffer, size_t size); michael@0: michael@0: // Return an array of 'struct fat_arch' structures describing the michael@0: // object files present in this fat binary file. Set |size| to the michael@0: // number of elements in the array. michael@0: // michael@0: // Assuming Read returned true, the entries are validated: it is michael@0: // safe to assume that the offsets and sizes in each 'struct michael@0: // fat_arch' refer to subranges of the bytes passed to Read. michael@0: // michael@0: // If there are no object files in this fat binary, then this michael@0: // function can return NULL. michael@0: // michael@0: // The array is owned by this FatReader instance; it will be freed when michael@0: // this FatReader is destroyed. michael@0: // michael@0: // This function returns a C-style array instead of a vector to make it michael@0: // possible to use the result with OS X functions like NXFindBestFatArch, michael@0: // so that the symbol dumper will behave consistently with other OS X michael@0: // utilities that work with fat binaries. michael@0: const struct fat_arch *object_files(size_t *count) const { michael@0: *count = object_files_.size(); michael@0: if (object_files_.size() > 0) michael@0: return &object_files_[0]; michael@0: return NULL; michael@0: } michael@0: michael@0: private: michael@0: // We use this to report problems parsing the file's contents. (WEAK) michael@0: Reporter *reporter_; michael@0: michael@0: // The contents of the fat binary or Mach-O file we're parsing. We do not michael@0: // own the storage it refers to. michael@0: ByteBuffer buffer_; michael@0: michael@0: // The magic number of this binary, in host byte order. michael@0: Magic magic_; michael@0: michael@0: // The list of object files in this binary. michael@0: // object_files_.size() == fat_header.nfat_arch michael@0: vector object_files_; michael@0: }; michael@0: michael@0: // A segment in a Mach-O file. All these fields have been byte-swapped as michael@0: // appropriate for use by the executing architecture. michael@0: struct Segment { michael@0: // The ByteBuffers below point into the bytes passed to the Reader that michael@0: // created this Segment. michael@0: michael@0: ByteBuffer section_list; // This segment's section list. michael@0: ByteBuffer contents; // This segment's contents. michael@0: michael@0: // This segment's name. michael@0: string name; michael@0: michael@0: // The address at which this segment should be loaded in memory. If michael@0: // bits_64 is false, only the bottom 32 bits of this value are valid. michael@0: uint64_t vmaddr; michael@0: michael@0: // The size of this segment when loaded into memory. This may be larger michael@0: // than contents.Size(), in which case the extra area will be michael@0: // initialized with zeros. If bits_64 is false, only the bottom 32 bits michael@0: // of this value are valid. michael@0: uint64_t vmsize; michael@0: michael@0: // The maximum and initial VM protection of this segment's contents. michael@0: uint32_t maxprot; michael@0: uint32_t initprot; michael@0: michael@0: // The number of sections in section_list. michael@0: uint32_t nsects; michael@0: michael@0: // Flags describing this segment, from SegmentFlags. michael@0: uint32_t flags; michael@0: michael@0: // True if this is a 64-bit section; false if it is a 32-bit section. michael@0: bool bits_64; michael@0: }; michael@0: michael@0: // A section in a Mach-O file. All these fields have been byte-swapped as michael@0: // appropriate for use by the executing architecture. michael@0: struct Section { michael@0: // This section's contents. This points into the bytes passed to the michael@0: // Reader that created this Section. michael@0: ByteBuffer contents; michael@0: michael@0: // This section's name. michael@0: string section_name; // section[_64].sectname michael@0: // The name of the segment this section belongs to. michael@0: string segment_name; // section[_64].segname michael@0: michael@0: // The address at which this section's contents should be loaded in michael@0: // memory. If bits_64 is false, only the bottom 32 bits of this value michael@0: // are valid. michael@0: uint64_t address; michael@0: michael@0: // The contents of this section should be loaded into memory at an michael@0: // address which is a multiple of (two raised to this power). michael@0: uint32_t align; michael@0: michael@0: // Flags from SectionFlags describing the section's contents. michael@0: uint32_t flags; michael@0: michael@0: // We don't support reading relocations yet. michael@0: michael@0: // True if this is a 64-bit section; false if it is a 32-bit section. michael@0: bool bits_64; michael@0: }; michael@0: michael@0: // A map from section names to Sections. michael@0: typedef map SectionMap; michael@0: michael@0: // A reader for a Mach-O file. michael@0: // michael@0: // This does not handle fat binaries; see FatReader above. FatReader michael@0: // provides a friendly interface for parsing data that could be either a michael@0: // fat binary or a Mach-O file. michael@0: class Reader { michael@0: public: michael@0: michael@0: // A class for reporting errors found while parsing Mach-O files. The michael@0: // default definitions of these member functions print messages to michael@0: // stderr. michael@0: class Reporter { michael@0: public: michael@0: // Create a reporter that attributes problems to |filename|. michael@0: explicit Reporter(const string &filename) : filename_(filename) { } michael@0: virtual ~Reporter() { } michael@0: michael@0: // Reporter functions for fatal errors return void; the reader will michael@0: // definitely return an error to its caller after calling them michael@0: michael@0: // The data does not begin with a Mach-O magic number, or the magic michael@0: // number does not match the expected value for the cpu architecture. michael@0: // This is a fatal error. michael@0: virtual void BadHeader(); michael@0: michael@0: // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|) michael@0: // does not match the expected CPU architecture michael@0: // (|expected_cpu_type|, |expected_cpu_subtype|). michael@0: virtual void CPUTypeMismatch(cpu_type_t cpu_type, michael@0: cpu_subtype_t cpu_subtype, michael@0: cpu_type_t expected_cpu_type, michael@0: cpu_subtype_t expected_cpu_subtype); michael@0: michael@0: // The file ends abruptly: either it is not large enough to hold a michael@0: // complete header, or the header implies that contents are present michael@0: // beyond the actual end of the file. michael@0: virtual void HeaderTruncated(); michael@0: michael@0: // The file's load command region, as given in the Mach-O header, is michael@0: // too large for the file. michael@0: virtual void LoadCommandRegionTruncated(); michael@0: michael@0: // The file's Mach-O header claims the file contains |claimed| load michael@0: // commands, but the I'th load command, of type |type|, extends beyond michael@0: // the end of the load command region, as given by the Mach-O header. michael@0: // If |type| is zero, the command's type was unreadable. michael@0: virtual void LoadCommandsOverrun(size_t claimed, size_t i, michael@0: LoadCommandType type); michael@0: michael@0: // The contents of the |i|'th load command, of type |type|, extend beyond michael@0: // the size given in the load command's header. michael@0: virtual void LoadCommandTooShort(size_t i, LoadCommandType type); michael@0: michael@0: // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named michael@0: // |name| is too short to hold the sections that its header says it does. michael@0: // (This more specific than LoadCommandTooShort.) michael@0: virtual void SectionsMissing(const string &name); michael@0: michael@0: // The segment named |name| claims that its contents lie beyond the end michael@0: // of the file. michael@0: virtual void MisplacedSegmentData(const string &name); michael@0: michael@0: // The section named |section| in the segment named |segment| claims that michael@0: // its contents do not lie entirely within the segment. michael@0: virtual void MisplacedSectionData(const string §ion, michael@0: const string &segment); michael@0: michael@0: // The LC_SYMTAB command claims that symbol table contents are located michael@0: // beyond the end of the file. michael@0: virtual void MisplacedSymbolTable(); michael@0: michael@0: // An attempt was made to read a Mach-O file of the unsupported michael@0: // CPU architecture |cpu_type|. michael@0: virtual void UnsupportedCPUType(cpu_type_t cpu_type); michael@0: michael@0: private: michael@0: string filename_; michael@0: }; michael@0: michael@0: // A handler for sections parsed from a segment. The WalkSegmentSections michael@0: // member function accepts an instance of this class, and applies it to michael@0: // each section defined in a given segment. michael@0: class SectionHandler { michael@0: public: michael@0: virtual ~SectionHandler() { } michael@0: michael@0: // Called to report that the segment's section list contains |section|. michael@0: // This should return true if the iteration should continue, or false michael@0: // if it should stop. michael@0: virtual bool HandleSection(const Section §ion) = 0; michael@0: }; michael@0: michael@0: // A handler for the load commands in a Mach-O file. michael@0: class LoadCommandHandler { michael@0: public: michael@0: LoadCommandHandler() { } michael@0: virtual ~LoadCommandHandler() { } michael@0: michael@0: // When called from WalkLoadCommands, the following handler functions michael@0: // should return true if they wish to continue iterating over the load michael@0: // command list, or false if they wish to stop iterating. michael@0: // michael@0: // When called from LoadCommandIterator::Handle or Reader::Handle, michael@0: // these functions' return values are simply passed through to Handle's michael@0: // caller. michael@0: // michael@0: // The definitions provided by this base class simply return true; the michael@0: // default is to silently ignore sections whose member functions the michael@0: // subclass doesn't override. michael@0: michael@0: // COMMAND is load command we don't recognize. We provide only the michael@0: // command type and a ByteBuffer enclosing the command's data (If we michael@0: // cannot parse the command type or its size, we call michael@0: // reporter_->IncompleteLoadCommand instead.) michael@0: virtual bool UnknownCommand(LoadCommandType type, michael@0: const ByteBuffer &contents) { michael@0: return true; michael@0: } michael@0: michael@0: // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment michael@0: // with the properties given in |segment|. michael@0: virtual bool SegmentCommand(const Segment &segment) { michael@0: return true; michael@0: } michael@0: michael@0: // The load command is LC_SYMTAB. |entries| holds the array of nlist michael@0: // entries, and |names| holds the strings the entries refer to. michael@0: virtual bool SymtabCommand(const ByteBuffer &entries, michael@0: const ByteBuffer &names) { michael@0: return true; michael@0: } michael@0: michael@0: // Add handler functions for more load commands here as needed. michael@0: }; michael@0: michael@0: // Create a Mach-O file reader that reports problems to |reporter|. michael@0: explicit Reader(Reporter *reporter) michael@0: : reporter_(reporter) { } michael@0: michael@0: // Read the given data as a Mach-O file. The reader retains pointers michael@0: // into the data passed, so the data should live as long as the reader michael@0: // does. On success, return true; on failure, return false. michael@0: // michael@0: // At most one of these functions should be invoked once on each Reader michael@0: // instance. michael@0: bool Read(const uint8_t *buffer, michael@0: size_t size, michael@0: cpu_type_t expected_cpu_type, michael@0: cpu_subtype_t expected_cpu_subtype); michael@0: bool Read(const ByteBuffer &buffer, michael@0: cpu_type_t expected_cpu_type, michael@0: cpu_subtype_t expected_cpu_subtype) { michael@0: return Read(buffer.start, michael@0: buffer.Size(), michael@0: expected_cpu_type, michael@0: expected_cpu_subtype); michael@0: } michael@0: michael@0: // Return this file's characteristics, as found in the Mach-O header. michael@0: cpu_type_t cpu_type() const { return cpu_type_; } michael@0: cpu_subtype_t cpu_subtype() const { return cpu_subtype_; } michael@0: FileType file_type() const { return file_type_; } michael@0: FileFlags flags() const { return flags_; } michael@0: michael@0: // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit michael@0: // Mach-O file. michael@0: bool bits_64() const { return bits_64_; } michael@0: michael@0: // Return true if this is a big-endian Mach-O file, false if it is michael@0: // little-endian. michael@0: bool big_endian() const { return big_endian_; } michael@0: michael@0: // Apply |handler| to each load command in this Mach-O file, stopping when michael@0: // a handler function returns false. If we encounter a malformed load michael@0: // command, report it via reporter_ and return false. Return true if all michael@0: // load commands were parseable and all handlers returned true. michael@0: bool WalkLoadCommands(LoadCommandHandler *handler) const; michael@0: michael@0: // Set |segment| to describe the segment named |name|, if present. If michael@0: // found, |segment|'s byte buffers refer to a subregion of the bytes michael@0: // passed to Read. If we find the section, return true; otherwise, michael@0: // return false. michael@0: bool FindSegment(const string &name, Segment *segment) const; michael@0: michael@0: // Apply |handler| to each section defined in |segment|. If |handler| returns michael@0: // false, stop iterating and return false. If all calls to |handler| return michael@0: // true and we reach the end of the section list, return true. michael@0: bool WalkSegmentSections(const Segment &segment, SectionHandler *handler) michael@0: const; michael@0: michael@0: // Clear |section_map| and then populate it with a map of the sections michael@0: // in |segment|, from section names to Section structures. michael@0: // Each Section's contents refer to bytes in |segment|'s contents. michael@0: // On success, return true; if a problem occurs, report it and return false. michael@0: bool MapSegmentSections(const Segment &segment, SectionMap *section_map) michael@0: const; michael@0: michael@0: private: michael@0: // Used internally. michael@0: class SegmentFinder; michael@0: class SectionMapper; michael@0: michael@0: // We use this to report problems parsing the file's contents. (WEAK) michael@0: Reporter *reporter_; michael@0: michael@0: // The contents of the Mach-O file we're parsing. We do not own the michael@0: // storage it refers to. michael@0: ByteBuffer buffer_; michael@0: michael@0: // True if this file is big-endian. michael@0: bool big_endian_; michael@0: michael@0: // True if this file is a 64-bit Mach-O file. michael@0: bool bits_64_; michael@0: michael@0: // This file's cpu type and subtype. michael@0: cpu_type_t cpu_type_; // mach_header[_64].cputype michael@0: cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype michael@0: michael@0: // This file's type. michael@0: FileType file_type_; // mach_header[_64].filetype michael@0: michael@0: // The region of buffer_ occupied by load commands. michael@0: ByteBuffer load_commands_; michael@0: michael@0: // The number of load commands in load_commands_. michael@0: uint32_t load_command_count_; // mach_header[_64].ncmds michael@0: michael@0: // This file's header flags. michael@0: FileFlags flags_; michael@0: }; michael@0: michael@0: } // namespace mach_o michael@0: } // namespace google_breakpad michael@0: michael@0: #endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_