toolkit/crashreporter/google-breakpad/src/common/mac/macho_reader.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/mac/macho_reader.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,459 @@
     1.4 +// -*- mode: C++ -*-
     1.5 +
     1.6 +// Copyright (c) 2010, Google Inc.
     1.7 +// All rights reserved.
     1.8 +//
     1.9 +// Redistribution and use in source and binary forms, with or without
    1.10 +// modification, are permitted provided that the following conditions are
    1.11 +// met:
    1.12 +//
    1.13 +//     * Redistributions of source code must retain the above copyright
    1.14 +// notice, this list of conditions and the following disclaimer.
    1.15 +//     * Redistributions in binary form must reproduce the above
    1.16 +// copyright notice, this list of conditions and the following disclaimer
    1.17 +// in the documentation and/or other materials provided with the
    1.18 +// distribution.
    1.19 +//     * Neither the name of Google Inc. nor the names of its
    1.20 +// contributors may be used to endorse or promote products derived from
    1.21 +// this software without specific prior written permission.
    1.22 +//
    1.23 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    1.24 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    1.25 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    1.26 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    1.27 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    1.28 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    1.29 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    1.30 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    1.31 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    1.32 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    1.33 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.34 +
    1.35 +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
    1.36 +
    1.37 +// macho_reader.h: A class for parsing Mach-O files.
    1.38 +
    1.39 +#ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_
    1.40 +#define BREAKPAD_COMMON_MAC_MACHO_READER_H_
    1.41 +
    1.42 +#include <mach-o/loader.h>
    1.43 +#include <mach-o/fat.h>
    1.44 +#include <stdint.h>
    1.45 +#include <stdlib.h>
    1.46 +#include <unistd.h>
    1.47 +
    1.48 +#include <map>
    1.49 +#include <string>
    1.50 +#include <vector>
    1.51 +
    1.52 +#include "common/byte_cursor.h"
    1.53 +
    1.54 +namespace google_breakpad {
    1.55 +namespace mach_o {
    1.56 +
    1.57 +using std::map;
    1.58 +using std::string;
    1.59 +using std::vector;
    1.60 +
    1.61 +// The Mac headers don't specify particular types for these groups of
    1.62 +// constants, but defining them here provides some documentation
    1.63 +// value.  We also give them the same width as the fields in which
    1.64 +// they appear, which makes them a bit easier to use with ByteCursors.
    1.65 +typedef uint32_t Magic;
    1.66 +typedef uint32_t FileType;
    1.67 +typedef uint32_t FileFlags;
    1.68 +typedef uint32_t LoadCommandType;
    1.69 +typedef uint32_t SegmentFlags;
    1.70 +typedef uint32_t SectionFlags;
    1.71 +
    1.72 +// A parser for fat binary files, used to store universal binaries.
    1.73 +// When applied to a (non-fat) Mach-O file, this behaves as if the
    1.74 +// file were a fat file containing a single object file.
    1.75 +class FatReader {
    1.76 + public:
    1.77 +
    1.78 +  // A class for reporting errors found while parsing fat binary files. The
    1.79 +  // default definitions of these methods print messages to stderr.
    1.80 +  class Reporter {
    1.81 +   public:
    1.82 +    // Create a reporter that attributes problems to |filename|.
    1.83 +    explicit Reporter(const string &filename) : filename_(filename) { }
    1.84 +
    1.85 +    virtual ~Reporter() { }
    1.86 +
    1.87 +    // The data does not begin with a fat binary or Mach-O magic number.
    1.88 +    // This is a fatal error.
    1.89 +    virtual void BadHeader();
    1.90 +
    1.91 +    // The Mach-O fat binary file ends abruptly, without enough space
    1.92 +    // to contain an object file it claims is present.
    1.93 +    virtual void MisplacedObjectFile();
    1.94 +
    1.95 +    // The file ends abruptly: either it is not large enough to hold a
    1.96 +    // complete header, or the header implies that contents are present
    1.97 +    // beyond the actual end of the file.
    1.98 +    virtual void TooShort();
    1.99 +  
   1.100 +   private:
   1.101 +    // The filename to which the reader should attribute problems.
   1.102 +    string filename_;
   1.103 +  };
   1.104 +
   1.105 +  // Create a fat binary file reader that uses |reporter| to report problems.
   1.106 +  explicit FatReader(Reporter *reporter) : reporter_(reporter) { }
   1.107 +  
   1.108 +  // Read the |size| bytes at |buffer| as a fat binary file. On success,
   1.109 +  // return true; on failure, report the problem to reporter_ and return
   1.110 +  // false.
   1.111 +  //
   1.112 +  // If the data is a plain Mach-O file, rather than a fat binary file,
   1.113 +  // then the reader behaves as if it had found a fat binary file whose
   1.114 +  // single object file is the Mach-O file.
   1.115 +  bool Read(const uint8_t *buffer, size_t size);
   1.116 +
   1.117 +  // Return an array of 'struct fat_arch' structures describing the
   1.118 +  // object files present in this fat binary file. Set |size| to the
   1.119 +  // number of elements in the array.
   1.120 +  //
   1.121 +  // Assuming Read returned true, the entries are validated: it is
   1.122 +  // safe to assume that the offsets and sizes in each 'struct
   1.123 +  // fat_arch' refer to subranges of the bytes passed to Read.
   1.124 +  //
   1.125 +  // If there are no object files in this fat binary, then this
   1.126 +  // function can return NULL.
   1.127 +  //
   1.128 +  // The array is owned by this FatReader instance; it will be freed when
   1.129 +  // this FatReader is destroyed.
   1.130 +  //
   1.131 +  // This function returns a C-style array instead of a vector to make it
   1.132 +  // possible to use the result with OS X functions like NXFindBestFatArch,
   1.133 +  // so that the symbol dumper will behave consistently with other OS X
   1.134 +  // utilities that work with fat binaries.
   1.135 +  const struct fat_arch *object_files(size_t *count) const { 
   1.136 +    *count = object_files_.size();
   1.137 +    if (object_files_.size() > 0)
   1.138 +      return &object_files_[0];
   1.139 +    return NULL;
   1.140 +  }
   1.141 +
   1.142 + private:
   1.143 +  // We use this to report problems parsing the file's contents. (WEAK)
   1.144 +  Reporter *reporter_;
   1.145 +
   1.146 +  // The contents of the fat binary or Mach-O file we're parsing. We do not
   1.147 +  // own the storage it refers to.
   1.148 +  ByteBuffer buffer_;
   1.149 +
   1.150 +  // The magic number of this binary, in host byte order.
   1.151 +  Magic magic_;
   1.152 +
   1.153 +  // The list of object files in this binary.
   1.154 +  // object_files_.size() == fat_header.nfat_arch
   1.155 +  vector<struct fat_arch> object_files_;
   1.156 +};
   1.157 +
   1.158 +// A segment in a Mach-O file. All these fields have been byte-swapped as
   1.159 +// appropriate for use by the executing architecture.
   1.160 +struct Segment {
   1.161 +  // The ByteBuffers below point into the bytes passed to the Reader that
   1.162 +  // created this Segment.
   1.163 +
   1.164 +  ByteBuffer section_list;    // This segment's section list.
   1.165 +  ByteBuffer contents;        // This segment's contents.
   1.166 +
   1.167 +  // This segment's name.
   1.168 +  string name;
   1.169 +
   1.170 +  // The address at which this segment should be loaded in memory. If
   1.171 +  // bits_64 is false, only the bottom 32 bits of this value are valid.
   1.172 +  uint64_t vmaddr;
   1.173 +
   1.174 +  // The size of this segment when loaded into memory. This may be larger
   1.175 +  // than contents.Size(), in which case the extra area will be
   1.176 +  // initialized with zeros. If bits_64 is false, only the bottom 32 bits
   1.177 +  // of this value are valid.
   1.178 +  uint64_t vmsize;
   1.179 +
   1.180 +  // The maximum and initial VM protection of this segment's contents.
   1.181 +  uint32_t maxprot;
   1.182 +  uint32_t initprot;
   1.183 +  
   1.184 +  // The number of sections in section_list.
   1.185 +  uint32_t nsects;
   1.186 +
   1.187 +  // Flags describing this segment, from SegmentFlags.
   1.188 +  uint32_t flags;
   1.189 +
   1.190 +  // True if this is a 64-bit section; false if it is a 32-bit section.
   1.191 +  bool bits_64;
   1.192 +};
   1.193 +
   1.194 +// A section in a Mach-O file. All these fields have been byte-swapped as
   1.195 +// appropriate for use by the executing architecture.
   1.196 +struct Section {
   1.197 +  // This section's contents. This points into the bytes passed to the
   1.198 +  // Reader that created this Section.
   1.199 +  ByteBuffer contents;
   1.200 +
   1.201 +  // This section's name.
   1.202 +  string section_name;  // section[_64].sectname
   1.203 +  // The name of the segment this section belongs to.
   1.204 +  string segment_name;  // section[_64].segname
   1.205 +
   1.206 +  // The address at which this section's contents should be loaded in
   1.207 +  // memory. If bits_64 is false, only the bottom 32 bits of this value
   1.208 +  // are valid.
   1.209 +  uint64_t address;
   1.210 +
   1.211 +  // The contents of this section should be loaded into memory at an
   1.212 +  // address which is a multiple of (two raised to this power).
   1.213 +  uint32_t align;
   1.214 +
   1.215 +  // Flags from SectionFlags describing the section's contents.
   1.216 +  uint32_t flags;
   1.217 +
   1.218 +  // We don't support reading relocations yet.
   1.219 +
   1.220 +  // True if this is a 64-bit section; false if it is a 32-bit section.
   1.221 +  bool bits_64;
   1.222 +};
   1.223 +
   1.224 +// A map from section names to Sections.
   1.225 +typedef map<string, Section> SectionMap;
   1.226 +
   1.227 +// A reader for a Mach-O file.
   1.228 +//
   1.229 +// This does not handle fat binaries; see FatReader above. FatReader
   1.230 +// provides a friendly interface for parsing data that could be either a
   1.231 +// fat binary or a Mach-O file.
   1.232 +class Reader {
   1.233 + public:
   1.234 +
   1.235 +  // A class for reporting errors found while parsing Mach-O files. The
   1.236 +  // default definitions of these member functions print messages to
   1.237 +  // stderr.
   1.238 +  class Reporter {
   1.239 +   public:
   1.240 +    // Create a reporter that attributes problems to |filename|.
   1.241 +    explicit Reporter(const string &filename) : filename_(filename) { }
   1.242 +    virtual ~Reporter() { }
   1.243 +
   1.244 +    // Reporter functions for fatal errors return void; the reader will
   1.245 +    // definitely return an error to its caller after calling them
   1.246 +
   1.247 +    // The data does not begin with a Mach-O magic number, or the magic
   1.248 +    // number does not match the expected value for the cpu architecture.
   1.249 +    // This is a fatal error.
   1.250 +    virtual void BadHeader();
   1.251 +
   1.252 +    // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|)
   1.253 +    // does not match the expected CPU architecture
   1.254 +    // (|expected_cpu_type|, |expected_cpu_subtype|).
   1.255 +    virtual void CPUTypeMismatch(cpu_type_t cpu_type,
   1.256 +                                 cpu_subtype_t cpu_subtype,
   1.257 +                                 cpu_type_t expected_cpu_type,
   1.258 +                                 cpu_subtype_t expected_cpu_subtype);
   1.259 +
   1.260 +    // The file ends abruptly: either it is not large enough to hold a
   1.261 +    // complete header, or the header implies that contents are present
   1.262 +    // beyond the actual end of the file.
   1.263 +    virtual void HeaderTruncated();
   1.264 +
   1.265 +    // The file's load command region, as given in the Mach-O header, is
   1.266 +    // too large for the file.
   1.267 +    virtual void LoadCommandRegionTruncated();
   1.268 +
   1.269 +    // The file's Mach-O header claims the file contains |claimed| load
   1.270 +    // commands, but the I'th load command, of type |type|, extends beyond
   1.271 +    // the end of the load command region, as given by the Mach-O header.
   1.272 +    // If |type| is zero, the command's type was unreadable.
   1.273 +    virtual void LoadCommandsOverrun(size_t claimed, size_t i,
   1.274 +                                     LoadCommandType type);
   1.275 +
   1.276 +    // The contents of the |i|'th load command, of type |type|, extend beyond
   1.277 +    // the size given in the load command's header.
   1.278 +    virtual void LoadCommandTooShort(size_t i, LoadCommandType type);
   1.279 +
   1.280 +    // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named
   1.281 +    // |name| is too short to hold the sections that its header says it does.
   1.282 +    // (This more specific than LoadCommandTooShort.)
   1.283 +    virtual void SectionsMissing(const string &name);
   1.284 +
   1.285 +    // The segment named |name| claims that its contents lie beyond the end
   1.286 +    // of the file.
   1.287 +    virtual void MisplacedSegmentData(const string &name);
   1.288 +
   1.289 +    // The section named |section| in the segment named |segment| claims that
   1.290 +    // its contents do not lie entirely within the segment.
   1.291 +    virtual void MisplacedSectionData(const string &section,
   1.292 +                                      const string &segment);
   1.293 +
   1.294 +    // The LC_SYMTAB command claims that symbol table contents are located
   1.295 +    // beyond the end of the file.
   1.296 +    virtual void MisplacedSymbolTable();
   1.297 +
   1.298 +    // An attempt was made to read a Mach-O file of the unsupported
   1.299 +    // CPU architecture |cpu_type|.
   1.300 +    virtual void UnsupportedCPUType(cpu_type_t cpu_type);
   1.301 +
   1.302 +   private:
   1.303 +    string filename_;
   1.304 +  };
   1.305 +
   1.306 +  // A handler for sections parsed from a segment. The WalkSegmentSections
   1.307 +  // member function accepts an instance of this class, and applies it to
   1.308 +  // each section defined in a given segment.
   1.309 +  class SectionHandler {
   1.310 +   public:
   1.311 +    virtual ~SectionHandler() { }
   1.312 +
   1.313 +    // Called to report that the segment's section list contains |section|.
   1.314 +    // This should return true if the iteration should continue, or false
   1.315 +    // if it should stop.
   1.316 +    virtual bool HandleSection(const Section &section) = 0;
   1.317 +  };
   1.318 +
   1.319 +  // A handler for the load commands in a Mach-O file.
   1.320 +  class LoadCommandHandler {
   1.321 +   public:
   1.322 +    LoadCommandHandler() { }
   1.323 +    virtual ~LoadCommandHandler() { }
   1.324 +
   1.325 +    // When called from WalkLoadCommands, the following handler functions
   1.326 +    // should return true if they wish to continue iterating over the load
   1.327 +    // command list, or false if they wish to stop iterating.
   1.328 +    //
   1.329 +    // When called from LoadCommandIterator::Handle or Reader::Handle,
   1.330 +    // these functions' return values are simply passed through to Handle's
   1.331 +    // caller.
   1.332 +    //
   1.333 +    // The definitions provided by this base class simply return true; the
   1.334 +    // default is to silently ignore sections whose member functions the
   1.335 +    // subclass doesn't override.
   1.336 +
   1.337 +    // COMMAND is load command we don't recognize. We provide only the
   1.338 +    // command type and a ByteBuffer enclosing the command's data (If we
   1.339 +    // cannot parse the command type or its size, we call
   1.340 +    // reporter_->IncompleteLoadCommand instead.)
   1.341 +    virtual bool UnknownCommand(LoadCommandType type,
   1.342 +                                const ByteBuffer &contents) {
   1.343 +      return true;
   1.344 +    }
   1.345 +
   1.346 +    // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment
   1.347 +    // with the properties given in |segment|.
   1.348 +    virtual bool SegmentCommand(const Segment &segment) {
   1.349 +      return true;
   1.350 +    }
   1.351 +
   1.352 +    // The load command is LC_SYMTAB. |entries| holds the array of nlist
   1.353 +    // entries, and |names| holds the strings the entries refer to.
   1.354 +    virtual bool SymtabCommand(const ByteBuffer &entries,
   1.355 +                               const ByteBuffer &names) {
   1.356 +      return true;
   1.357 +    }
   1.358 +
   1.359 +    // Add handler functions for more load commands here as needed.
   1.360 +  };
   1.361 +
   1.362 +  // Create a Mach-O file reader that reports problems to |reporter|.
   1.363 +  explicit Reader(Reporter *reporter)
   1.364 +      : reporter_(reporter) { }
   1.365 +
   1.366 +  // Read the given data as a Mach-O file. The reader retains pointers
   1.367 +  // into the data passed, so the data should live as long as the reader
   1.368 +  // does. On success, return true; on failure, return false.
   1.369 +  //
   1.370 +  // At most one of these functions should be invoked once on each Reader
   1.371 +  // instance.
   1.372 +  bool Read(const uint8_t *buffer,
   1.373 +            size_t size,
   1.374 +            cpu_type_t expected_cpu_type,
   1.375 +            cpu_subtype_t expected_cpu_subtype);
   1.376 +  bool Read(const ByteBuffer &buffer,
   1.377 +            cpu_type_t expected_cpu_type,
   1.378 +            cpu_subtype_t expected_cpu_subtype) {
   1.379 +    return Read(buffer.start,
   1.380 +                buffer.Size(),
   1.381 +                expected_cpu_type,
   1.382 +                expected_cpu_subtype); 
   1.383 +  }
   1.384 +
   1.385 +  // Return this file's characteristics, as found in the Mach-O header.
   1.386 +  cpu_type_t    cpu_type()    const { return cpu_type_; }
   1.387 +  cpu_subtype_t cpu_subtype() const { return cpu_subtype_; }
   1.388 +  FileType      file_type()   const { return file_type_; }
   1.389 +  FileFlags     flags()       const { return flags_; }
   1.390 +
   1.391 +  // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit
   1.392 +  // Mach-O file.
   1.393 +  bool bits_64() const { return bits_64_; }
   1.394 +
   1.395 +  // Return true if this is a big-endian Mach-O file, false if it is
   1.396 +  // little-endian.
   1.397 +  bool big_endian() const { return big_endian_; }
   1.398 +
   1.399 +  // Apply |handler| to each load command in this Mach-O file, stopping when
   1.400 +  // a handler function returns false. If we encounter a malformed load
   1.401 +  // command, report it via reporter_ and return false. Return true if all
   1.402 +  // load commands were parseable and all handlers returned true.
   1.403 +  bool WalkLoadCommands(LoadCommandHandler *handler) const;
   1.404 +
   1.405 +  // Set |segment| to describe the segment named |name|, if present. If
   1.406 +  // found, |segment|'s byte buffers refer to a subregion of the bytes
   1.407 +  // passed to Read. If we find the section, return true; otherwise,
   1.408 +  // return false.
   1.409 +  bool FindSegment(const string &name, Segment *segment) const;
   1.410 +
   1.411 +  // Apply |handler| to each section defined in |segment|. If |handler| returns
   1.412 +  // false, stop iterating and return false. If all calls to |handler| return
   1.413 +  // true and we reach the end of the section list, return true.
   1.414 +  bool WalkSegmentSections(const Segment &segment, SectionHandler *handler)
   1.415 +    const;
   1.416 +
   1.417 +  // Clear |section_map| and then populate it with a map of the sections
   1.418 +  // in |segment|, from section names to Section structures.
   1.419 +  // Each Section's contents refer to bytes in |segment|'s contents.
   1.420 +  // On success, return true; if a problem occurs, report it and return false.
   1.421 +  bool MapSegmentSections(const Segment &segment, SectionMap *section_map)
   1.422 +    const;
   1.423 +
   1.424 + private:
   1.425 +  // Used internally.
   1.426 +  class SegmentFinder;
   1.427 +  class SectionMapper;
   1.428 +
   1.429 +  // We use this to report problems parsing the file's contents. (WEAK)
   1.430 +  Reporter *reporter_;
   1.431 +
   1.432 +  // The contents of the Mach-O file we're parsing. We do not own the
   1.433 +  // storage it refers to.
   1.434 +  ByteBuffer buffer_;
   1.435 +
   1.436 +  // True if this file is big-endian.
   1.437 +  bool big_endian_;
   1.438 +
   1.439 +  // True if this file is a 64-bit Mach-O file.
   1.440 +  bool bits_64_;
   1.441 +
   1.442 +  // This file's cpu type and subtype.
   1.443 +  cpu_type_t cpu_type_;        // mach_header[_64].cputype
   1.444 +  cpu_subtype_t cpu_subtype_;  // mach_header[_64].cpusubtype
   1.445 +
   1.446 +  // This file's type.
   1.447 +  FileType file_type_;         // mach_header[_64].filetype
   1.448 +
   1.449 +  // The region of buffer_ occupied by load commands.
   1.450 +  ByteBuffer load_commands_;
   1.451 +
   1.452 +  // The number of load commands in load_commands_.
   1.453 +  uint32_t load_command_count_;  // mach_header[_64].ncmds
   1.454 +
   1.455 +  // This file's header flags.
   1.456 +  FileFlags flags_;
   1.457 +};
   1.458 +
   1.459 +}  // namespace mach_o
   1.460 +}  // namespace google_breakpad
   1.461 +
   1.462 +#endif  // BREAKPAD_COMMON_MAC_MACHO_READER_H_

mercurial