1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/mac/macho_reader.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,459 @@ 1.4 +// -*- mode: C++ -*- 1.5 + 1.6 +// Copyright (c) 2010, Google Inc. 1.7 +// All rights reserved. 1.8 +// 1.9 +// Redistribution and use in source and binary forms, with or without 1.10 +// modification, are permitted provided that the following conditions are 1.11 +// met: 1.12 +// 1.13 +// * Redistributions of source code must retain the above copyright 1.14 +// notice, this list of conditions and the following disclaimer. 1.15 +// * Redistributions in binary form must reproduce the above 1.16 +// copyright notice, this list of conditions and the following disclaimer 1.17 +// in the documentation and/or other materials provided with the 1.18 +// distribution. 1.19 +// * Neither the name of Google Inc. nor the names of its 1.20 +// contributors may be used to endorse or promote products derived from 1.21 +// this software without specific prior written permission. 1.22 +// 1.23 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.24 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.25 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.26 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.27 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.28 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.29 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.30 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.31 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.32 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.33 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.34 + 1.35 +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 1.36 + 1.37 +// macho_reader.h: A class for parsing Mach-O files. 1.38 + 1.39 +#ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_ 1.40 +#define BREAKPAD_COMMON_MAC_MACHO_READER_H_ 1.41 + 1.42 +#include <mach-o/loader.h> 1.43 +#include <mach-o/fat.h> 1.44 +#include <stdint.h> 1.45 +#include <stdlib.h> 1.46 +#include <unistd.h> 1.47 + 1.48 +#include <map> 1.49 +#include <string> 1.50 +#include <vector> 1.51 + 1.52 +#include "common/byte_cursor.h" 1.53 + 1.54 +namespace google_breakpad { 1.55 +namespace mach_o { 1.56 + 1.57 +using std::map; 1.58 +using std::string; 1.59 +using std::vector; 1.60 + 1.61 +// The Mac headers don't specify particular types for these groups of 1.62 +// constants, but defining them here provides some documentation 1.63 +// value. We also give them the same width as the fields in which 1.64 +// they appear, which makes them a bit easier to use with ByteCursors. 1.65 +typedef uint32_t Magic; 1.66 +typedef uint32_t FileType; 1.67 +typedef uint32_t FileFlags; 1.68 +typedef uint32_t LoadCommandType; 1.69 +typedef uint32_t SegmentFlags; 1.70 +typedef uint32_t SectionFlags; 1.71 + 1.72 +// A parser for fat binary files, used to store universal binaries. 1.73 +// When applied to a (non-fat) Mach-O file, this behaves as if the 1.74 +// file were a fat file containing a single object file. 1.75 +class FatReader { 1.76 + public: 1.77 + 1.78 + // A class for reporting errors found while parsing fat binary files. The 1.79 + // default definitions of these methods print messages to stderr. 1.80 + class Reporter { 1.81 + public: 1.82 + // Create a reporter that attributes problems to |filename|. 1.83 + explicit Reporter(const string &filename) : filename_(filename) { } 1.84 + 1.85 + virtual ~Reporter() { } 1.86 + 1.87 + // The data does not begin with a fat binary or Mach-O magic number. 1.88 + // This is a fatal error. 1.89 + virtual void BadHeader(); 1.90 + 1.91 + // The Mach-O fat binary file ends abruptly, without enough space 1.92 + // to contain an object file it claims is present. 1.93 + virtual void MisplacedObjectFile(); 1.94 + 1.95 + // The file ends abruptly: either it is not large enough to hold a 1.96 + // complete header, or the header implies that contents are present 1.97 + // beyond the actual end of the file. 1.98 + virtual void TooShort(); 1.99 + 1.100 + private: 1.101 + // The filename to which the reader should attribute problems. 1.102 + string filename_; 1.103 + }; 1.104 + 1.105 + // Create a fat binary file reader that uses |reporter| to report problems. 1.106 + explicit FatReader(Reporter *reporter) : reporter_(reporter) { } 1.107 + 1.108 + // Read the |size| bytes at |buffer| as a fat binary file. On success, 1.109 + // return true; on failure, report the problem to reporter_ and return 1.110 + // false. 1.111 + // 1.112 + // If the data is a plain Mach-O file, rather than a fat binary file, 1.113 + // then the reader behaves as if it had found a fat binary file whose 1.114 + // single object file is the Mach-O file. 1.115 + bool Read(const uint8_t *buffer, size_t size); 1.116 + 1.117 + // Return an array of 'struct fat_arch' structures describing the 1.118 + // object files present in this fat binary file. Set |size| to the 1.119 + // number of elements in the array. 1.120 + // 1.121 + // Assuming Read returned true, the entries are validated: it is 1.122 + // safe to assume that the offsets and sizes in each 'struct 1.123 + // fat_arch' refer to subranges of the bytes passed to Read. 1.124 + // 1.125 + // If there are no object files in this fat binary, then this 1.126 + // function can return NULL. 1.127 + // 1.128 + // The array is owned by this FatReader instance; it will be freed when 1.129 + // this FatReader is destroyed. 1.130 + // 1.131 + // This function returns a C-style array instead of a vector to make it 1.132 + // possible to use the result with OS X functions like NXFindBestFatArch, 1.133 + // so that the symbol dumper will behave consistently with other OS X 1.134 + // utilities that work with fat binaries. 1.135 + const struct fat_arch *object_files(size_t *count) const { 1.136 + *count = object_files_.size(); 1.137 + if (object_files_.size() > 0) 1.138 + return &object_files_[0]; 1.139 + return NULL; 1.140 + } 1.141 + 1.142 + private: 1.143 + // We use this to report problems parsing the file's contents. (WEAK) 1.144 + Reporter *reporter_; 1.145 + 1.146 + // The contents of the fat binary or Mach-O file we're parsing. We do not 1.147 + // own the storage it refers to. 1.148 + ByteBuffer buffer_; 1.149 + 1.150 + // The magic number of this binary, in host byte order. 1.151 + Magic magic_; 1.152 + 1.153 + // The list of object files in this binary. 1.154 + // object_files_.size() == fat_header.nfat_arch 1.155 + vector<struct fat_arch> object_files_; 1.156 +}; 1.157 + 1.158 +// A segment in a Mach-O file. All these fields have been byte-swapped as 1.159 +// appropriate for use by the executing architecture. 1.160 +struct Segment { 1.161 + // The ByteBuffers below point into the bytes passed to the Reader that 1.162 + // created this Segment. 1.163 + 1.164 + ByteBuffer section_list; // This segment's section list. 1.165 + ByteBuffer contents; // This segment's contents. 1.166 + 1.167 + // This segment's name. 1.168 + string name; 1.169 + 1.170 + // The address at which this segment should be loaded in memory. If 1.171 + // bits_64 is false, only the bottom 32 bits of this value are valid. 1.172 + uint64_t vmaddr; 1.173 + 1.174 + // The size of this segment when loaded into memory. This may be larger 1.175 + // than contents.Size(), in which case the extra area will be 1.176 + // initialized with zeros. If bits_64 is false, only the bottom 32 bits 1.177 + // of this value are valid. 1.178 + uint64_t vmsize; 1.179 + 1.180 + // The maximum and initial VM protection of this segment's contents. 1.181 + uint32_t maxprot; 1.182 + uint32_t initprot; 1.183 + 1.184 + // The number of sections in section_list. 1.185 + uint32_t nsects; 1.186 + 1.187 + // Flags describing this segment, from SegmentFlags. 1.188 + uint32_t flags; 1.189 + 1.190 + // True if this is a 64-bit section; false if it is a 32-bit section. 1.191 + bool bits_64; 1.192 +}; 1.193 + 1.194 +// A section in a Mach-O file. All these fields have been byte-swapped as 1.195 +// appropriate for use by the executing architecture. 1.196 +struct Section { 1.197 + // This section's contents. This points into the bytes passed to the 1.198 + // Reader that created this Section. 1.199 + ByteBuffer contents; 1.200 + 1.201 + // This section's name. 1.202 + string section_name; // section[_64].sectname 1.203 + // The name of the segment this section belongs to. 1.204 + string segment_name; // section[_64].segname 1.205 + 1.206 + // The address at which this section's contents should be loaded in 1.207 + // memory. If bits_64 is false, only the bottom 32 bits of this value 1.208 + // are valid. 1.209 + uint64_t address; 1.210 + 1.211 + // The contents of this section should be loaded into memory at an 1.212 + // address which is a multiple of (two raised to this power). 1.213 + uint32_t align; 1.214 + 1.215 + // Flags from SectionFlags describing the section's contents. 1.216 + uint32_t flags; 1.217 + 1.218 + // We don't support reading relocations yet. 1.219 + 1.220 + // True if this is a 64-bit section; false if it is a 32-bit section. 1.221 + bool bits_64; 1.222 +}; 1.223 + 1.224 +// A map from section names to Sections. 1.225 +typedef map<string, Section> SectionMap; 1.226 + 1.227 +// A reader for a Mach-O file. 1.228 +// 1.229 +// This does not handle fat binaries; see FatReader above. FatReader 1.230 +// provides a friendly interface for parsing data that could be either a 1.231 +// fat binary or a Mach-O file. 1.232 +class Reader { 1.233 + public: 1.234 + 1.235 + // A class for reporting errors found while parsing Mach-O files. The 1.236 + // default definitions of these member functions print messages to 1.237 + // stderr. 1.238 + class Reporter { 1.239 + public: 1.240 + // Create a reporter that attributes problems to |filename|. 1.241 + explicit Reporter(const string &filename) : filename_(filename) { } 1.242 + virtual ~Reporter() { } 1.243 + 1.244 + // Reporter functions for fatal errors return void; the reader will 1.245 + // definitely return an error to its caller after calling them 1.246 + 1.247 + // The data does not begin with a Mach-O magic number, or the magic 1.248 + // number does not match the expected value for the cpu architecture. 1.249 + // This is a fatal error. 1.250 + virtual void BadHeader(); 1.251 + 1.252 + // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|) 1.253 + // does not match the expected CPU architecture 1.254 + // (|expected_cpu_type|, |expected_cpu_subtype|). 1.255 + virtual void CPUTypeMismatch(cpu_type_t cpu_type, 1.256 + cpu_subtype_t cpu_subtype, 1.257 + cpu_type_t expected_cpu_type, 1.258 + cpu_subtype_t expected_cpu_subtype); 1.259 + 1.260 + // The file ends abruptly: either it is not large enough to hold a 1.261 + // complete header, or the header implies that contents are present 1.262 + // beyond the actual end of the file. 1.263 + virtual void HeaderTruncated(); 1.264 + 1.265 + // The file's load command region, as given in the Mach-O header, is 1.266 + // too large for the file. 1.267 + virtual void LoadCommandRegionTruncated(); 1.268 + 1.269 + // The file's Mach-O header claims the file contains |claimed| load 1.270 + // commands, but the I'th load command, of type |type|, extends beyond 1.271 + // the end of the load command region, as given by the Mach-O header. 1.272 + // If |type| is zero, the command's type was unreadable. 1.273 + virtual void LoadCommandsOverrun(size_t claimed, size_t i, 1.274 + LoadCommandType type); 1.275 + 1.276 + // The contents of the |i|'th load command, of type |type|, extend beyond 1.277 + // the size given in the load command's header. 1.278 + virtual void LoadCommandTooShort(size_t i, LoadCommandType type); 1.279 + 1.280 + // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named 1.281 + // |name| is too short to hold the sections that its header says it does. 1.282 + // (This more specific than LoadCommandTooShort.) 1.283 + virtual void SectionsMissing(const string &name); 1.284 + 1.285 + // The segment named |name| claims that its contents lie beyond the end 1.286 + // of the file. 1.287 + virtual void MisplacedSegmentData(const string &name); 1.288 + 1.289 + // The section named |section| in the segment named |segment| claims that 1.290 + // its contents do not lie entirely within the segment. 1.291 + virtual void MisplacedSectionData(const string §ion, 1.292 + const string &segment); 1.293 + 1.294 + // The LC_SYMTAB command claims that symbol table contents are located 1.295 + // beyond the end of the file. 1.296 + virtual void MisplacedSymbolTable(); 1.297 + 1.298 + // An attempt was made to read a Mach-O file of the unsupported 1.299 + // CPU architecture |cpu_type|. 1.300 + virtual void UnsupportedCPUType(cpu_type_t cpu_type); 1.301 + 1.302 + private: 1.303 + string filename_; 1.304 + }; 1.305 + 1.306 + // A handler for sections parsed from a segment. The WalkSegmentSections 1.307 + // member function accepts an instance of this class, and applies it to 1.308 + // each section defined in a given segment. 1.309 + class SectionHandler { 1.310 + public: 1.311 + virtual ~SectionHandler() { } 1.312 + 1.313 + // Called to report that the segment's section list contains |section|. 1.314 + // This should return true if the iteration should continue, or false 1.315 + // if it should stop. 1.316 + virtual bool HandleSection(const Section §ion) = 0; 1.317 + }; 1.318 + 1.319 + // A handler for the load commands in a Mach-O file. 1.320 + class LoadCommandHandler { 1.321 + public: 1.322 + LoadCommandHandler() { } 1.323 + virtual ~LoadCommandHandler() { } 1.324 + 1.325 + // When called from WalkLoadCommands, the following handler functions 1.326 + // should return true if they wish to continue iterating over the load 1.327 + // command list, or false if they wish to stop iterating. 1.328 + // 1.329 + // When called from LoadCommandIterator::Handle or Reader::Handle, 1.330 + // these functions' return values are simply passed through to Handle's 1.331 + // caller. 1.332 + // 1.333 + // The definitions provided by this base class simply return true; the 1.334 + // default is to silently ignore sections whose member functions the 1.335 + // subclass doesn't override. 1.336 + 1.337 + // COMMAND is load command we don't recognize. We provide only the 1.338 + // command type and a ByteBuffer enclosing the command's data (If we 1.339 + // cannot parse the command type or its size, we call 1.340 + // reporter_->IncompleteLoadCommand instead.) 1.341 + virtual bool UnknownCommand(LoadCommandType type, 1.342 + const ByteBuffer &contents) { 1.343 + return true; 1.344 + } 1.345 + 1.346 + // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment 1.347 + // with the properties given in |segment|. 1.348 + virtual bool SegmentCommand(const Segment &segment) { 1.349 + return true; 1.350 + } 1.351 + 1.352 + // The load command is LC_SYMTAB. |entries| holds the array of nlist 1.353 + // entries, and |names| holds the strings the entries refer to. 1.354 + virtual bool SymtabCommand(const ByteBuffer &entries, 1.355 + const ByteBuffer &names) { 1.356 + return true; 1.357 + } 1.358 + 1.359 + // Add handler functions for more load commands here as needed. 1.360 + }; 1.361 + 1.362 + // Create a Mach-O file reader that reports problems to |reporter|. 1.363 + explicit Reader(Reporter *reporter) 1.364 + : reporter_(reporter) { } 1.365 + 1.366 + // Read the given data as a Mach-O file. The reader retains pointers 1.367 + // into the data passed, so the data should live as long as the reader 1.368 + // does. On success, return true; on failure, return false. 1.369 + // 1.370 + // At most one of these functions should be invoked once on each Reader 1.371 + // instance. 1.372 + bool Read(const uint8_t *buffer, 1.373 + size_t size, 1.374 + cpu_type_t expected_cpu_type, 1.375 + cpu_subtype_t expected_cpu_subtype); 1.376 + bool Read(const ByteBuffer &buffer, 1.377 + cpu_type_t expected_cpu_type, 1.378 + cpu_subtype_t expected_cpu_subtype) { 1.379 + return Read(buffer.start, 1.380 + buffer.Size(), 1.381 + expected_cpu_type, 1.382 + expected_cpu_subtype); 1.383 + } 1.384 + 1.385 + // Return this file's characteristics, as found in the Mach-O header. 1.386 + cpu_type_t cpu_type() const { return cpu_type_; } 1.387 + cpu_subtype_t cpu_subtype() const { return cpu_subtype_; } 1.388 + FileType file_type() const { return file_type_; } 1.389 + FileFlags flags() const { return flags_; } 1.390 + 1.391 + // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit 1.392 + // Mach-O file. 1.393 + bool bits_64() const { return bits_64_; } 1.394 + 1.395 + // Return true if this is a big-endian Mach-O file, false if it is 1.396 + // little-endian. 1.397 + bool big_endian() const { return big_endian_; } 1.398 + 1.399 + // Apply |handler| to each load command in this Mach-O file, stopping when 1.400 + // a handler function returns false. If we encounter a malformed load 1.401 + // command, report it via reporter_ and return false. Return true if all 1.402 + // load commands were parseable and all handlers returned true. 1.403 + bool WalkLoadCommands(LoadCommandHandler *handler) const; 1.404 + 1.405 + // Set |segment| to describe the segment named |name|, if present. If 1.406 + // found, |segment|'s byte buffers refer to a subregion of the bytes 1.407 + // passed to Read. If we find the section, return true; otherwise, 1.408 + // return false. 1.409 + bool FindSegment(const string &name, Segment *segment) const; 1.410 + 1.411 + // Apply |handler| to each section defined in |segment|. If |handler| returns 1.412 + // false, stop iterating and return false. If all calls to |handler| return 1.413 + // true and we reach the end of the section list, return true. 1.414 + bool WalkSegmentSections(const Segment &segment, SectionHandler *handler) 1.415 + const; 1.416 + 1.417 + // Clear |section_map| and then populate it with a map of the sections 1.418 + // in |segment|, from section names to Section structures. 1.419 + // Each Section's contents refer to bytes in |segment|'s contents. 1.420 + // On success, return true; if a problem occurs, report it and return false. 1.421 + bool MapSegmentSections(const Segment &segment, SectionMap *section_map) 1.422 + const; 1.423 + 1.424 + private: 1.425 + // Used internally. 1.426 + class SegmentFinder; 1.427 + class SectionMapper; 1.428 + 1.429 + // We use this to report problems parsing the file's contents. (WEAK) 1.430 + Reporter *reporter_; 1.431 + 1.432 + // The contents of the Mach-O file we're parsing. We do not own the 1.433 + // storage it refers to. 1.434 + ByteBuffer buffer_; 1.435 + 1.436 + // True if this file is big-endian. 1.437 + bool big_endian_; 1.438 + 1.439 + // True if this file is a 64-bit Mach-O file. 1.440 + bool bits_64_; 1.441 + 1.442 + // This file's cpu type and subtype. 1.443 + cpu_type_t cpu_type_; // mach_header[_64].cputype 1.444 + cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype 1.445 + 1.446 + // This file's type. 1.447 + FileType file_type_; // mach_header[_64].filetype 1.448 + 1.449 + // The region of buffer_ occupied by load commands. 1.450 + ByteBuffer load_commands_; 1.451 + 1.452 + // The number of load commands in load_commands_. 1.453 + uint32_t load_command_count_; // mach_header[_64].ncmds 1.454 + 1.455 + // This file's header flags. 1.456 + FileFlags flags_; 1.457 +}; 1.458 + 1.459 +} // namespace mach_o 1.460 +} // namespace google_breakpad 1.461 + 1.462 +#endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_