1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/mac/macho_reader.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,530 @@ 1.4 +// Copyright (c) 2010, Google Inc. 1.5 +// All rights reserved. 1.6 +// 1.7 +// Redistribution and use in source and binary forms, with or without 1.8 +// modification, are permitted provided that the following conditions are 1.9 +// met: 1.10 +// 1.11 +// * Redistributions of source code must retain the above copyright 1.12 +// notice, this list of conditions and the following disclaimer. 1.13 +// * Redistributions in binary form must reproduce the above 1.14 +// copyright notice, this list of conditions and the following disclaimer 1.15 +// in the documentation and/or other materials provided with the 1.16 +// distribution. 1.17 +// * Neither the name of Google Inc. nor the names of its 1.18 +// contributors may be used to endorse or promote products derived from 1.19 +// this software without specific prior written permission. 1.20 +// 1.21 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.22 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.23 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.24 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.25 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.26 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.27 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.28 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.29 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.30 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.31 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.32 + 1.33 +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 1.34 + 1.35 +// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and 1.36 +// google_breakpad::Mach_O::Reader. See macho_reader.h for details. 1.37 + 1.38 +#include "common/mac/macho_reader.h" 1.39 + 1.40 +#include <assert.h> 1.41 +#include <stdio.h> 1.42 +#include <stdlib.h> 1.43 + 1.44 +// Unfortunately, CPU_TYPE_ARM is not define for 10.4. 1.45 +#if !defined(CPU_TYPE_ARM) 1.46 +#define CPU_TYPE_ARM 12 1.47 +#endif 1.48 + 1.49 +namespace google_breakpad { 1.50 +namespace mach_o { 1.51 + 1.52 +// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its 1.53 +// arguments, so you can't place expressions that do necessary work in 1.54 +// the argument of an assert. Nor can you assign the result of the 1.55 +// expression to a variable and assert that the variable's value is 1.56 +// true: you'll get unused variable warnings when NDEBUG is #defined. 1.57 +// 1.58 +// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that 1.59 +// the result is true if NDEBUG is not #defined. 1.60 +#if defined(NDEBUG) 1.61 +#define ASSERT_ALWAYS_EVAL(x) (x) 1.62 +#else 1.63 +#define ASSERT_ALWAYS_EVAL(x) assert(x) 1.64 +#endif 1.65 + 1.66 +void FatReader::Reporter::BadHeader() { 1.67 + fprintf(stderr, "%s: file is neither a fat binary file" 1.68 + " nor a Mach-O object file\n", filename_.c_str()); 1.69 +} 1.70 + 1.71 +void FatReader::Reporter::TooShort() { 1.72 + fprintf(stderr, "%s: file too short for the data it claims to contain\n", 1.73 + filename_.c_str()); 1.74 +} 1.75 + 1.76 +void FatReader::Reporter::MisplacedObjectFile() { 1.77 + fprintf(stderr, "%s: file too short for the object files it claims" 1.78 + " to contain\n", filename_.c_str()); 1.79 +} 1.80 + 1.81 +bool FatReader::Read(const uint8_t *buffer, size_t size) { 1.82 + buffer_.start = buffer; 1.83 + buffer_.end = buffer + size; 1.84 + ByteCursor cursor(&buffer_); 1.85 + 1.86 + // Fat binaries always use big-endian, so read the magic number in 1.87 + // that endianness. To recognize Mach-O magic numbers, which can use 1.88 + // either endianness, check for both the proper and reversed forms 1.89 + // of the magic numbers. 1.90 + cursor.set_big_endian(true); 1.91 + if (cursor >> magic_) { 1.92 + if (magic_ == FAT_MAGIC) { 1.93 + // How many object files does this fat binary contain? 1.94 + uint32_t object_files_count; 1.95 + if (!(cursor >> object_files_count)) { // nfat_arch 1.96 + reporter_->TooShort(); 1.97 + return false; 1.98 + } 1.99 + 1.100 + // Read the list of object files. 1.101 + object_files_.resize(object_files_count); 1.102 + for (size_t i = 0; i < object_files_count; i++) { 1.103 + struct fat_arch *objfile = &object_files_[i]; 1.104 + 1.105 + // Read this object file entry, byte-swapping as appropriate. 1.106 + cursor >> objfile->cputype 1.107 + >> objfile->cpusubtype 1.108 + >> objfile->offset 1.109 + >> objfile->size 1.110 + >> objfile->align; 1.111 + if (!cursor) { 1.112 + reporter_->TooShort(); 1.113 + return false; 1.114 + } 1.115 + // Does the file actually have the bytes this entry refers to? 1.116 + size_t fat_size = buffer_.Size(); 1.117 + if (objfile->offset > fat_size || 1.118 + objfile->size > fat_size - objfile->offset) { 1.119 + reporter_->MisplacedObjectFile(); 1.120 + return false; 1.121 + } 1.122 + } 1.123 + 1.124 + return true; 1.125 + } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 || 1.126 + magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) { 1.127 + // If this is a little-endian Mach-O file, fix the cursor's endianness. 1.128 + if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) 1.129 + cursor.set_big_endian(false); 1.130 + // Record the entire file as a single entry in the object file list. 1.131 + object_files_.resize(1); 1.132 + 1.133 + // Get the cpu type and subtype from the Mach-O header. 1.134 + if (!(cursor >> object_files_[0].cputype 1.135 + >> object_files_[0].cpusubtype)) { 1.136 + reporter_->TooShort(); 1.137 + return false; 1.138 + } 1.139 + 1.140 + object_files_[0].offset = 0; 1.141 + object_files_[0].size = static_cast<uint32_t>(buffer_.Size()); 1.142 + // This alignment is correct for 32 and 64-bit x86 and ppc. 1.143 + // See get_align in the lipo source for other architectures: 1.144 + // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c 1.145 + object_files_[0].align = 12; // 2^12 == 4096 1.146 + 1.147 + return true; 1.148 + } 1.149 + } 1.150 + 1.151 + reporter_->BadHeader(); 1.152 + return false; 1.153 +} 1.154 + 1.155 +void Reader::Reporter::BadHeader() { 1.156 + fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str()); 1.157 +} 1.158 + 1.159 +void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type, 1.160 + cpu_subtype_t cpu_subtype, 1.161 + cpu_type_t expected_cpu_type, 1.162 + cpu_subtype_t expected_cpu_subtype) { 1.163 + fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected" 1.164 + " type %d, subtype %d\n", 1.165 + filename_.c_str(), cpu_type, cpu_subtype, 1.166 + expected_cpu_type, expected_cpu_subtype); 1.167 +} 1.168 + 1.169 +void Reader::Reporter::HeaderTruncated() { 1.170 + fprintf(stderr, "%s: file does not contain a complete Mach-O header\n", 1.171 + filename_.c_str()); 1.172 +} 1.173 + 1.174 +void Reader::Reporter::LoadCommandRegionTruncated() { 1.175 + fprintf(stderr, "%s: file too short to hold load command region" 1.176 + " given in Mach-O header\n", filename_.c_str()); 1.177 +} 1.178 + 1.179 +void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i, 1.180 + LoadCommandType type) { 1.181 + fprintf(stderr, "%s: file's header claims there are %ld" 1.182 + " load commands, but load command #%ld", 1.183 + filename_.c_str(), claimed, i); 1.184 + if (type) fprintf(stderr, ", of type %d,", type); 1.185 + fprintf(stderr, " extends beyond the end of the load command region\n"); 1.186 +} 1.187 + 1.188 +void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) { 1.189 + fprintf(stderr, "%s: the contents of load command #%ld, of type %d," 1.190 + " extend beyond the size given in the load command's header\n", 1.191 + filename_.c_str(), i, type); 1.192 +} 1.193 + 1.194 +void Reader::Reporter::SectionsMissing(const string &name) { 1.195 + fprintf(stderr, "%s: the load command for segment '%s'" 1.196 + " is too short to hold the section headers it claims to have\n", 1.197 + filename_.c_str(), name.c_str()); 1.198 +} 1.199 + 1.200 +void Reader::Reporter::MisplacedSegmentData(const string &name) { 1.201 + fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond" 1.202 + " the end of the file\n", filename_.c_str(), name.c_str()); 1.203 +} 1.204 + 1.205 +void Reader::Reporter::MisplacedSectionData(const string §ion, 1.206 + const string &segment) { 1.207 + fprintf(stderr, "%s: the section '%s' in segment '%s'" 1.208 + " claims its contents lie outside the segment's contents\n", 1.209 + filename_.c_str(), section.c_str(), segment.c_str()); 1.210 +} 1.211 + 1.212 +void Reader::Reporter::MisplacedSymbolTable() { 1.213 + fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol" 1.214 + " table's contents are located beyond the end of the file\n", 1.215 + filename_.c_str()); 1.216 +} 1.217 + 1.218 +void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) { 1.219 + fprintf(stderr, "%s: CPU type %d is not supported\n", 1.220 + filename_.c_str(), cpu_type); 1.221 +} 1.222 + 1.223 +bool Reader::Read(const uint8_t *buffer, 1.224 + size_t size, 1.225 + cpu_type_t expected_cpu_type, 1.226 + cpu_subtype_t expected_cpu_subtype) { 1.227 + assert(!buffer_.start); 1.228 + buffer_.start = buffer; 1.229 + buffer_.end = buffer + size; 1.230 + ByteCursor cursor(&buffer_, true); 1.231 + uint32_t magic; 1.232 + if (!(cursor >> magic)) { 1.233 + reporter_->HeaderTruncated(); 1.234 + return false; 1.235 + } 1.236 + 1.237 + if (expected_cpu_type != CPU_TYPE_ANY) { 1.238 + uint32_t expected_magic; 1.239 + // validate that magic matches the expected cpu type 1.240 + switch (expected_cpu_type) { 1.241 + case CPU_TYPE_ARM: 1.242 + case CPU_TYPE_I386: 1.243 + expected_magic = MH_CIGAM; 1.244 + break; 1.245 + case CPU_TYPE_POWERPC: 1.246 + expected_magic = MH_MAGIC; 1.247 + break; 1.248 + case CPU_TYPE_X86_64: 1.249 + expected_magic = MH_CIGAM_64; 1.250 + break; 1.251 + case CPU_TYPE_POWERPC64: 1.252 + expected_magic = MH_MAGIC_64; 1.253 + break; 1.254 + default: 1.255 + reporter_->UnsupportedCPUType(expected_cpu_type); 1.256 + return false; 1.257 + } 1.258 + 1.259 + if (expected_magic != magic) { 1.260 + reporter_->BadHeader(); 1.261 + return false; 1.262 + } 1.263 + } 1.264 + 1.265 + // Since the byte cursor is in big-endian mode, a reversed magic number 1.266 + // always indicates a little-endian file, regardless of our own endianness. 1.267 + switch (magic) { 1.268 + case MH_MAGIC: big_endian_ = true; bits_64_ = false; break; 1.269 + case MH_CIGAM: big_endian_ = false; bits_64_ = false; break; 1.270 + case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break; 1.271 + case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break; 1.272 + default: 1.273 + reporter_->BadHeader(); 1.274 + return false; 1.275 + } 1.276 + cursor.set_big_endian(big_endian_); 1.277 + uint32_t commands_size, reserved; 1.278 + cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_ 1.279 + >> commands_size >> flags_; 1.280 + if (bits_64_) 1.281 + cursor >> reserved; 1.282 + if (!cursor) { 1.283 + reporter_->HeaderTruncated(); 1.284 + return false; 1.285 + } 1.286 + 1.287 + if (expected_cpu_type != CPU_TYPE_ANY && 1.288 + (expected_cpu_type != cpu_type_ || 1.289 + expected_cpu_subtype != cpu_subtype_)) { 1.290 + reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_, 1.291 + expected_cpu_type, expected_cpu_subtype); 1.292 + return false; 1.293 + } 1.294 + 1.295 + cursor 1.296 + .PointTo(&load_commands_.start, commands_size) 1.297 + .PointTo(&load_commands_.end, 0); 1.298 + if (!cursor) { 1.299 + reporter_->LoadCommandRegionTruncated(); 1.300 + return false; 1.301 + } 1.302 + 1.303 + return true; 1.304 +} 1.305 + 1.306 +bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const { 1.307 + ByteCursor list_cursor(&load_commands_, big_endian_); 1.308 + 1.309 + for (size_t index = 0; index < load_command_count_; ++index) { 1.310 + // command refers to this load command alone, so that cursor will 1.311 + // refuse to read past the load command's end. But since we haven't 1.312 + // read the size yet, let command initially refer to the entire 1.313 + // remainder of the load command series. 1.314 + ByteBuffer command(list_cursor.here(), list_cursor.Available()); 1.315 + ByteCursor cursor(&command, big_endian_); 1.316 + 1.317 + // Read the command type and size --- fields common to all commands. 1.318 + uint32_t type, size; 1.319 + if (!(cursor >> type)) { 1.320 + reporter_->LoadCommandsOverrun(load_command_count_, index, 0); 1.321 + return false; 1.322 + } 1.323 + if (!(cursor >> size) || size > command.Size()) { 1.324 + reporter_->LoadCommandsOverrun(load_command_count_, index, type); 1.325 + return false; 1.326 + } 1.327 + 1.328 + // Now that we've read the length, restrict command's range to this 1.329 + // load command only. 1.330 + command.end = command.start + size; 1.331 + 1.332 + switch (type) { 1.333 + case LC_SEGMENT: 1.334 + case LC_SEGMENT_64: { 1.335 + Segment segment; 1.336 + segment.bits_64 = (type == LC_SEGMENT_64); 1.337 + size_t word_size = segment.bits_64 ? 8 : 4; 1.338 + cursor.CString(&segment.name, 16); 1.339 + size_t file_offset, file_size; 1.340 + cursor 1.341 + .Read(word_size, false, &segment.vmaddr) 1.342 + .Read(word_size, false, &segment.vmsize) 1.343 + .Read(word_size, false, &file_offset) 1.344 + .Read(word_size, false, &file_size); 1.345 + cursor >> segment.maxprot 1.346 + >> segment.initprot 1.347 + >> segment.nsects 1.348 + >> segment.flags; 1.349 + if (!cursor) { 1.350 + reporter_->LoadCommandTooShort(index, type); 1.351 + return false; 1.352 + } 1.353 + if (file_offset > buffer_.Size() || 1.354 + file_size > buffer_.Size() - file_offset) { 1.355 + reporter_->MisplacedSegmentData(segment.name); 1.356 + return false; 1.357 + } 1.358 + // Mach-O files in .dSYM bundles have the contents of the loaded 1.359 + // segments removed, and their file offsets and file sizes zeroed 1.360 + // out. To help us handle this special case properly, give such 1.361 + // segments' contents NULL starting and ending pointers. 1.362 + if (file_offset == 0 && file_size == 0) { 1.363 + segment.contents.start = segment.contents.end = NULL; 1.364 + } else { 1.365 + segment.contents.start = buffer_.start + file_offset; 1.366 + segment.contents.end = segment.contents.start + file_size; 1.367 + } 1.368 + // The section list occupies the remainder of this load command's space. 1.369 + segment.section_list.start = cursor.here(); 1.370 + segment.section_list.end = command.end; 1.371 + 1.372 + if (!handler->SegmentCommand(segment)) 1.373 + return false; 1.374 + break; 1.375 + } 1.376 + 1.377 + case LC_SYMTAB: { 1.378 + uint32_t symoff, nsyms, stroff, strsize; 1.379 + cursor >> symoff >> nsyms >> stroff >> strsize; 1.380 + if (!cursor) { 1.381 + reporter_->LoadCommandTooShort(index, type); 1.382 + return false; 1.383 + } 1.384 + // How big are the entries in the symbol table? 1.385 + // sizeof(struct nlist_64) : sizeof(struct nlist), 1.386 + // but be paranoid about alignment vs. target architecture. 1.387 + size_t symbol_size = bits_64_ ? 16 : 12; 1.388 + // How big is the entire symbol array? 1.389 + size_t symbols_size = nsyms * symbol_size; 1.390 + if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff || 1.391 + stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) { 1.392 + reporter_->MisplacedSymbolTable(); 1.393 + return false; 1.394 + } 1.395 + ByteBuffer entries(buffer_.start + symoff, symbols_size); 1.396 + ByteBuffer names(buffer_.start + stroff, strsize); 1.397 + if (!handler->SymtabCommand(entries, names)) 1.398 + return false; 1.399 + break; 1.400 + } 1.401 + 1.402 + default: { 1.403 + if (!handler->UnknownCommand(type, command)) 1.404 + return false; 1.405 + break; 1.406 + } 1.407 + } 1.408 + 1.409 + list_cursor.set_here(command.end); 1.410 + } 1.411 + 1.412 + return true; 1.413 +} 1.414 + 1.415 +// A load command handler that looks for a segment of a given name. 1.416 +class Reader::SegmentFinder : public LoadCommandHandler { 1.417 + public: 1.418 + // Create a load command handler that looks for a segment named NAME, 1.419 + // and sets SEGMENT to describe it if found. 1.420 + SegmentFinder(const string &name, Segment *segment) 1.421 + : name_(name), segment_(segment), found_() { } 1.422 + 1.423 + // Return true if the traversal found the segment, false otherwise. 1.424 + bool found() const { return found_; } 1.425 + 1.426 + bool SegmentCommand(const Segment &segment) { 1.427 + if (segment.name == name_) { 1.428 + *segment_ = segment; 1.429 + found_ = true; 1.430 + return false; 1.431 + } 1.432 + return true; 1.433 + } 1.434 + 1.435 + private: 1.436 + // The name of the segment our creator is looking for. 1.437 + const string &name_; 1.438 + 1.439 + // Where we should store the segment if found. (WEAK) 1.440 + Segment *segment_; 1.441 + 1.442 + // True if we found the segment. 1.443 + bool found_; 1.444 +}; 1.445 + 1.446 +bool Reader::FindSegment(const string &name, Segment *segment) const { 1.447 + SegmentFinder finder(name, segment); 1.448 + WalkLoadCommands(&finder); 1.449 + return finder.found(); 1.450 +} 1.451 + 1.452 +bool Reader::WalkSegmentSections(const Segment &segment, 1.453 + SectionHandler *handler) const { 1.454 + size_t word_size = segment.bits_64 ? 8 : 4; 1.455 + ByteCursor cursor(&segment.section_list, big_endian_); 1.456 + 1.457 + for (size_t i = 0; i < segment.nsects; i++) { 1.458 + Section section; 1.459 + section.bits_64 = segment.bits_64; 1.460 + uint64_t size; 1.461 + uint32_t offset, dummy32; 1.462 + cursor 1.463 + .CString(§ion.section_name, 16) 1.464 + .CString(§ion.segment_name, 16) 1.465 + .Read(word_size, false, §ion.address) 1.466 + .Read(word_size, false, &size) 1.467 + >> offset 1.468 + >> section.align 1.469 + >> dummy32 1.470 + >> dummy32 1.471 + >> section.flags 1.472 + >> dummy32 1.473 + >> dummy32; 1.474 + if (section.bits_64) 1.475 + cursor >> dummy32; 1.476 + if (!cursor) { 1.477 + reporter_->SectionsMissing(segment.name); 1.478 + return false; 1.479 + } 1.480 + if ((section.flags & SECTION_TYPE) == S_ZEROFILL) { 1.481 + // Zero-fill sections have a size, but no contents. 1.482 + section.contents.start = section.contents.end = NULL; 1.483 + } else if (segment.contents.start == NULL && 1.484 + segment.contents.end == NULL) { 1.485 + // Mach-O files in .dSYM bundles have the contents of the loaded 1.486 + // segments removed, and their file offsets and file sizes zeroed 1.487 + // out. However, the sections within those segments still have 1.488 + // non-zero sizes. There's no reason to call MisplacedSectionData in 1.489 + // this case; the caller may just need the section's load 1.490 + // address. But do set the contents' limits to NULL, for safety. 1.491 + section.contents.start = section.contents.end = NULL; 1.492 + } else { 1.493 + if (offset < size_t(segment.contents.start - buffer_.start) || 1.494 + offset > size_t(segment.contents.end - buffer_.start) || 1.495 + size > size_t(segment.contents.end - buffer_.start - offset)) { 1.496 + reporter_->MisplacedSectionData(section.section_name, 1.497 + section.segment_name); 1.498 + return false; 1.499 + } 1.500 + section.contents.start = buffer_.start + offset; 1.501 + section.contents.end = section.contents.start + size; 1.502 + } 1.503 + if (!handler->HandleSection(section)) 1.504 + return false; 1.505 + } 1.506 + return true; 1.507 +} 1.508 + 1.509 +// A SectionHandler that builds a SectionMap for the sections within a 1.510 +// given segment. 1.511 +class Reader::SectionMapper: public SectionHandler { 1.512 + public: 1.513 + // Create a SectionHandler that populates MAP with an entry for 1.514 + // each section it is given. 1.515 + SectionMapper(SectionMap *map) : map_(map) { } 1.516 + bool HandleSection(const Section §ion) { 1.517 + (*map_)[section.section_name] = section; 1.518 + return true; 1.519 + } 1.520 + private: 1.521 + // The map under construction. (WEAK) 1.522 + SectionMap *map_; 1.523 +}; 1.524 + 1.525 +bool Reader::MapSegmentSections(const Segment &segment, 1.526 + SectionMap *section_map) const { 1.527 + section_map->clear(); 1.528 + SectionMapper mapper(section_map); 1.529 + return WalkSegmentSections(segment, &mapper); 1.530 +} 1.531 + 1.532 +} // namespace mach_o 1.533 +} // namespace google_breakpad