|
1 // Copyright (c) 2010, Google Inc. |
|
2 // All rights reserved. |
|
3 // |
|
4 // Redistribution and use in source and binary forms, with or without |
|
5 // modification, are permitted provided that the following conditions are |
|
6 // met: |
|
7 // |
|
8 // * Redistributions of source code must retain the above copyright |
|
9 // notice, this list of conditions and the following disclaimer. |
|
10 // * Redistributions in binary form must reproduce the above |
|
11 // copyright notice, this list of conditions and the following disclaimer |
|
12 // in the documentation and/or other materials provided with the |
|
13 // distribution. |
|
14 // * Neither the name of Google Inc. nor the names of its |
|
15 // contributors may be used to endorse or promote products derived from |
|
16 // this software without specific prior written permission. |
|
17 // |
|
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 |
|
30 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
|
31 |
|
32 // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and |
|
33 // google_breakpad::Mach_O::Reader. See macho_reader.h for details. |
|
34 |
|
35 #include "common/mac/macho_reader.h" |
|
36 |
|
37 #include <assert.h> |
|
38 #include <stdio.h> |
|
39 #include <stdlib.h> |
|
40 |
|
41 // Unfortunately, CPU_TYPE_ARM is not define for 10.4. |
|
42 #if !defined(CPU_TYPE_ARM) |
|
43 #define CPU_TYPE_ARM 12 |
|
44 #endif |
|
45 |
|
46 namespace google_breakpad { |
|
47 namespace mach_o { |
|
48 |
|
49 // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its |
|
50 // arguments, so you can't place expressions that do necessary work in |
|
51 // the argument of an assert. Nor can you assign the result of the |
|
52 // expression to a variable and assert that the variable's value is |
|
53 // true: you'll get unused variable warnings when NDEBUG is #defined. |
|
54 // |
|
55 // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that |
|
56 // the result is true if NDEBUG is not #defined. |
|
57 #if defined(NDEBUG) |
|
58 #define ASSERT_ALWAYS_EVAL(x) (x) |
|
59 #else |
|
60 #define ASSERT_ALWAYS_EVAL(x) assert(x) |
|
61 #endif |
|
62 |
|
63 void FatReader::Reporter::BadHeader() { |
|
64 fprintf(stderr, "%s: file is neither a fat binary file" |
|
65 " nor a Mach-O object file\n", filename_.c_str()); |
|
66 } |
|
67 |
|
68 void FatReader::Reporter::TooShort() { |
|
69 fprintf(stderr, "%s: file too short for the data it claims to contain\n", |
|
70 filename_.c_str()); |
|
71 } |
|
72 |
|
73 void FatReader::Reporter::MisplacedObjectFile() { |
|
74 fprintf(stderr, "%s: file too short for the object files it claims" |
|
75 " to contain\n", filename_.c_str()); |
|
76 } |
|
77 |
|
78 bool FatReader::Read(const uint8_t *buffer, size_t size) { |
|
79 buffer_.start = buffer; |
|
80 buffer_.end = buffer + size; |
|
81 ByteCursor cursor(&buffer_); |
|
82 |
|
83 // Fat binaries always use big-endian, so read the magic number in |
|
84 // that endianness. To recognize Mach-O magic numbers, which can use |
|
85 // either endianness, check for both the proper and reversed forms |
|
86 // of the magic numbers. |
|
87 cursor.set_big_endian(true); |
|
88 if (cursor >> magic_) { |
|
89 if (magic_ == FAT_MAGIC) { |
|
90 // How many object files does this fat binary contain? |
|
91 uint32_t object_files_count; |
|
92 if (!(cursor >> object_files_count)) { // nfat_arch |
|
93 reporter_->TooShort(); |
|
94 return false; |
|
95 } |
|
96 |
|
97 // Read the list of object files. |
|
98 object_files_.resize(object_files_count); |
|
99 for (size_t i = 0; i < object_files_count; i++) { |
|
100 struct fat_arch *objfile = &object_files_[i]; |
|
101 |
|
102 // Read this object file entry, byte-swapping as appropriate. |
|
103 cursor >> objfile->cputype |
|
104 >> objfile->cpusubtype |
|
105 >> objfile->offset |
|
106 >> objfile->size |
|
107 >> objfile->align; |
|
108 if (!cursor) { |
|
109 reporter_->TooShort(); |
|
110 return false; |
|
111 } |
|
112 // Does the file actually have the bytes this entry refers to? |
|
113 size_t fat_size = buffer_.Size(); |
|
114 if (objfile->offset > fat_size || |
|
115 objfile->size > fat_size - objfile->offset) { |
|
116 reporter_->MisplacedObjectFile(); |
|
117 return false; |
|
118 } |
|
119 } |
|
120 |
|
121 return true; |
|
122 } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 || |
|
123 magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) { |
|
124 // If this is a little-endian Mach-O file, fix the cursor's endianness. |
|
125 if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) |
|
126 cursor.set_big_endian(false); |
|
127 // Record the entire file as a single entry in the object file list. |
|
128 object_files_.resize(1); |
|
129 |
|
130 // Get the cpu type and subtype from the Mach-O header. |
|
131 if (!(cursor >> object_files_[0].cputype |
|
132 >> object_files_[0].cpusubtype)) { |
|
133 reporter_->TooShort(); |
|
134 return false; |
|
135 } |
|
136 |
|
137 object_files_[0].offset = 0; |
|
138 object_files_[0].size = static_cast<uint32_t>(buffer_.Size()); |
|
139 // This alignment is correct for 32 and 64-bit x86 and ppc. |
|
140 // See get_align in the lipo source for other architectures: |
|
141 // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c |
|
142 object_files_[0].align = 12; // 2^12 == 4096 |
|
143 |
|
144 return true; |
|
145 } |
|
146 } |
|
147 |
|
148 reporter_->BadHeader(); |
|
149 return false; |
|
150 } |
|
151 |
|
152 void Reader::Reporter::BadHeader() { |
|
153 fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str()); |
|
154 } |
|
155 |
|
156 void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type, |
|
157 cpu_subtype_t cpu_subtype, |
|
158 cpu_type_t expected_cpu_type, |
|
159 cpu_subtype_t expected_cpu_subtype) { |
|
160 fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected" |
|
161 " type %d, subtype %d\n", |
|
162 filename_.c_str(), cpu_type, cpu_subtype, |
|
163 expected_cpu_type, expected_cpu_subtype); |
|
164 } |
|
165 |
|
166 void Reader::Reporter::HeaderTruncated() { |
|
167 fprintf(stderr, "%s: file does not contain a complete Mach-O header\n", |
|
168 filename_.c_str()); |
|
169 } |
|
170 |
|
171 void Reader::Reporter::LoadCommandRegionTruncated() { |
|
172 fprintf(stderr, "%s: file too short to hold load command region" |
|
173 " given in Mach-O header\n", filename_.c_str()); |
|
174 } |
|
175 |
|
176 void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i, |
|
177 LoadCommandType type) { |
|
178 fprintf(stderr, "%s: file's header claims there are %ld" |
|
179 " load commands, but load command #%ld", |
|
180 filename_.c_str(), claimed, i); |
|
181 if (type) fprintf(stderr, ", of type %d,", type); |
|
182 fprintf(stderr, " extends beyond the end of the load command region\n"); |
|
183 } |
|
184 |
|
185 void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) { |
|
186 fprintf(stderr, "%s: the contents of load command #%ld, of type %d," |
|
187 " extend beyond the size given in the load command's header\n", |
|
188 filename_.c_str(), i, type); |
|
189 } |
|
190 |
|
191 void Reader::Reporter::SectionsMissing(const string &name) { |
|
192 fprintf(stderr, "%s: the load command for segment '%s'" |
|
193 " is too short to hold the section headers it claims to have\n", |
|
194 filename_.c_str(), name.c_str()); |
|
195 } |
|
196 |
|
197 void Reader::Reporter::MisplacedSegmentData(const string &name) { |
|
198 fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond" |
|
199 " the end of the file\n", filename_.c_str(), name.c_str()); |
|
200 } |
|
201 |
|
202 void Reader::Reporter::MisplacedSectionData(const string §ion, |
|
203 const string &segment) { |
|
204 fprintf(stderr, "%s: the section '%s' in segment '%s'" |
|
205 " claims its contents lie outside the segment's contents\n", |
|
206 filename_.c_str(), section.c_str(), segment.c_str()); |
|
207 } |
|
208 |
|
209 void Reader::Reporter::MisplacedSymbolTable() { |
|
210 fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol" |
|
211 " table's contents are located beyond the end of the file\n", |
|
212 filename_.c_str()); |
|
213 } |
|
214 |
|
215 void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) { |
|
216 fprintf(stderr, "%s: CPU type %d is not supported\n", |
|
217 filename_.c_str(), cpu_type); |
|
218 } |
|
219 |
|
220 bool Reader::Read(const uint8_t *buffer, |
|
221 size_t size, |
|
222 cpu_type_t expected_cpu_type, |
|
223 cpu_subtype_t expected_cpu_subtype) { |
|
224 assert(!buffer_.start); |
|
225 buffer_.start = buffer; |
|
226 buffer_.end = buffer + size; |
|
227 ByteCursor cursor(&buffer_, true); |
|
228 uint32_t magic; |
|
229 if (!(cursor >> magic)) { |
|
230 reporter_->HeaderTruncated(); |
|
231 return false; |
|
232 } |
|
233 |
|
234 if (expected_cpu_type != CPU_TYPE_ANY) { |
|
235 uint32_t expected_magic; |
|
236 // validate that magic matches the expected cpu type |
|
237 switch (expected_cpu_type) { |
|
238 case CPU_TYPE_ARM: |
|
239 case CPU_TYPE_I386: |
|
240 expected_magic = MH_CIGAM; |
|
241 break; |
|
242 case CPU_TYPE_POWERPC: |
|
243 expected_magic = MH_MAGIC; |
|
244 break; |
|
245 case CPU_TYPE_X86_64: |
|
246 expected_magic = MH_CIGAM_64; |
|
247 break; |
|
248 case CPU_TYPE_POWERPC64: |
|
249 expected_magic = MH_MAGIC_64; |
|
250 break; |
|
251 default: |
|
252 reporter_->UnsupportedCPUType(expected_cpu_type); |
|
253 return false; |
|
254 } |
|
255 |
|
256 if (expected_magic != magic) { |
|
257 reporter_->BadHeader(); |
|
258 return false; |
|
259 } |
|
260 } |
|
261 |
|
262 // Since the byte cursor is in big-endian mode, a reversed magic number |
|
263 // always indicates a little-endian file, regardless of our own endianness. |
|
264 switch (magic) { |
|
265 case MH_MAGIC: big_endian_ = true; bits_64_ = false; break; |
|
266 case MH_CIGAM: big_endian_ = false; bits_64_ = false; break; |
|
267 case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break; |
|
268 case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break; |
|
269 default: |
|
270 reporter_->BadHeader(); |
|
271 return false; |
|
272 } |
|
273 cursor.set_big_endian(big_endian_); |
|
274 uint32_t commands_size, reserved; |
|
275 cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_ |
|
276 >> commands_size >> flags_; |
|
277 if (bits_64_) |
|
278 cursor >> reserved; |
|
279 if (!cursor) { |
|
280 reporter_->HeaderTruncated(); |
|
281 return false; |
|
282 } |
|
283 |
|
284 if (expected_cpu_type != CPU_TYPE_ANY && |
|
285 (expected_cpu_type != cpu_type_ || |
|
286 expected_cpu_subtype != cpu_subtype_)) { |
|
287 reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_, |
|
288 expected_cpu_type, expected_cpu_subtype); |
|
289 return false; |
|
290 } |
|
291 |
|
292 cursor |
|
293 .PointTo(&load_commands_.start, commands_size) |
|
294 .PointTo(&load_commands_.end, 0); |
|
295 if (!cursor) { |
|
296 reporter_->LoadCommandRegionTruncated(); |
|
297 return false; |
|
298 } |
|
299 |
|
300 return true; |
|
301 } |
|
302 |
|
303 bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const { |
|
304 ByteCursor list_cursor(&load_commands_, big_endian_); |
|
305 |
|
306 for (size_t index = 0; index < load_command_count_; ++index) { |
|
307 // command refers to this load command alone, so that cursor will |
|
308 // refuse to read past the load command's end. But since we haven't |
|
309 // read the size yet, let command initially refer to the entire |
|
310 // remainder of the load command series. |
|
311 ByteBuffer command(list_cursor.here(), list_cursor.Available()); |
|
312 ByteCursor cursor(&command, big_endian_); |
|
313 |
|
314 // Read the command type and size --- fields common to all commands. |
|
315 uint32_t type, size; |
|
316 if (!(cursor >> type)) { |
|
317 reporter_->LoadCommandsOverrun(load_command_count_, index, 0); |
|
318 return false; |
|
319 } |
|
320 if (!(cursor >> size) || size > command.Size()) { |
|
321 reporter_->LoadCommandsOverrun(load_command_count_, index, type); |
|
322 return false; |
|
323 } |
|
324 |
|
325 // Now that we've read the length, restrict command's range to this |
|
326 // load command only. |
|
327 command.end = command.start + size; |
|
328 |
|
329 switch (type) { |
|
330 case LC_SEGMENT: |
|
331 case LC_SEGMENT_64: { |
|
332 Segment segment; |
|
333 segment.bits_64 = (type == LC_SEGMENT_64); |
|
334 size_t word_size = segment.bits_64 ? 8 : 4; |
|
335 cursor.CString(&segment.name, 16); |
|
336 size_t file_offset, file_size; |
|
337 cursor |
|
338 .Read(word_size, false, &segment.vmaddr) |
|
339 .Read(word_size, false, &segment.vmsize) |
|
340 .Read(word_size, false, &file_offset) |
|
341 .Read(word_size, false, &file_size); |
|
342 cursor >> segment.maxprot |
|
343 >> segment.initprot |
|
344 >> segment.nsects |
|
345 >> segment.flags; |
|
346 if (!cursor) { |
|
347 reporter_->LoadCommandTooShort(index, type); |
|
348 return false; |
|
349 } |
|
350 if (file_offset > buffer_.Size() || |
|
351 file_size > buffer_.Size() - file_offset) { |
|
352 reporter_->MisplacedSegmentData(segment.name); |
|
353 return false; |
|
354 } |
|
355 // Mach-O files in .dSYM bundles have the contents of the loaded |
|
356 // segments removed, and their file offsets and file sizes zeroed |
|
357 // out. To help us handle this special case properly, give such |
|
358 // segments' contents NULL starting and ending pointers. |
|
359 if (file_offset == 0 && file_size == 0) { |
|
360 segment.contents.start = segment.contents.end = NULL; |
|
361 } else { |
|
362 segment.contents.start = buffer_.start + file_offset; |
|
363 segment.contents.end = segment.contents.start + file_size; |
|
364 } |
|
365 // The section list occupies the remainder of this load command's space. |
|
366 segment.section_list.start = cursor.here(); |
|
367 segment.section_list.end = command.end; |
|
368 |
|
369 if (!handler->SegmentCommand(segment)) |
|
370 return false; |
|
371 break; |
|
372 } |
|
373 |
|
374 case LC_SYMTAB: { |
|
375 uint32_t symoff, nsyms, stroff, strsize; |
|
376 cursor >> symoff >> nsyms >> stroff >> strsize; |
|
377 if (!cursor) { |
|
378 reporter_->LoadCommandTooShort(index, type); |
|
379 return false; |
|
380 } |
|
381 // How big are the entries in the symbol table? |
|
382 // sizeof(struct nlist_64) : sizeof(struct nlist), |
|
383 // but be paranoid about alignment vs. target architecture. |
|
384 size_t symbol_size = bits_64_ ? 16 : 12; |
|
385 // How big is the entire symbol array? |
|
386 size_t symbols_size = nsyms * symbol_size; |
|
387 if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff || |
|
388 stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) { |
|
389 reporter_->MisplacedSymbolTable(); |
|
390 return false; |
|
391 } |
|
392 ByteBuffer entries(buffer_.start + symoff, symbols_size); |
|
393 ByteBuffer names(buffer_.start + stroff, strsize); |
|
394 if (!handler->SymtabCommand(entries, names)) |
|
395 return false; |
|
396 break; |
|
397 } |
|
398 |
|
399 default: { |
|
400 if (!handler->UnknownCommand(type, command)) |
|
401 return false; |
|
402 break; |
|
403 } |
|
404 } |
|
405 |
|
406 list_cursor.set_here(command.end); |
|
407 } |
|
408 |
|
409 return true; |
|
410 } |
|
411 |
|
412 // A load command handler that looks for a segment of a given name. |
|
413 class Reader::SegmentFinder : public LoadCommandHandler { |
|
414 public: |
|
415 // Create a load command handler that looks for a segment named NAME, |
|
416 // and sets SEGMENT to describe it if found. |
|
417 SegmentFinder(const string &name, Segment *segment) |
|
418 : name_(name), segment_(segment), found_() { } |
|
419 |
|
420 // Return true if the traversal found the segment, false otherwise. |
|
421 bool found() const { return found_; } |
|
422 |
|
423 bool SegmentCommand(const Segment &segment) { |
|
424 if (segment.name == name_) { |
|
425 *segment_ = segment; |
|
426 found_ = true; |
|
427 return false; |
|
428 } |
|
429 return true; |
|
430 } |
|
431 |
|
432 private: |
|
433 // The name of the segment our creator is looking for. |
|
434 const string &name_; |
|
435 |
|
436 // Where we should store the segment if found. (WEAK) |
|
437 Segment *segment_; |
|
438 |
|
439 // True if we found the segment. |
|
440 bool found_; |
|
441 }; |
|
442 |
|
443 bool Reader::FindSegment(const string &name, Segment *segment) const { |
|
444 SegmentFinder finder(name, segment); |
|
445 WalkLoadCommands(&finder); |
|
446 return finder.found(); |
|
447 } |
|
448 |
|
449 bool Reader::WalkSegmentSections(const Segment &segment, |
|
450 SectionHandler *handler) const { |
|
451 size_t word_size = segment.bits_64 ? 8 : 4; |
|
452 ByteCursor cursor(&segment.section_list, big_endian_); |
|
453 |
|
454 for (size_t i = 0; i < segment.nsects; i++) { |
|
455 Section section; |
|
456 section.bits_64 = segment.bits_64; |
|
457 uint64_t size; |
|
458 uint32_t offset, dummy32; |
|
459 cursor |
|
460 .CString(§ion.section_name, 16) |
|
461 .CString(§ion.segment_name, 16) |
|
462 .Read(word_size, false, §ion.address) |
|
463 .Read(word_size, false, &size) |
|
464 >> offset |
|
465 >> section.align |
|
466 >> dummy32 |
|
467 >> dummy32 |
|
468 >> section.flags |
|
469 >> dummy32 |
|
470 >> dummy32; |
|
471 if (section.bits_64) |
|
472 cursor >> dummy32; |
|
473 if (!cursor) { |
|
474 reporter_->SectionsMissing(segment.name); |
|
475 return false; |
|
476 } |
|
477 if ((section.flags & SECTION_TYPE) == S_ZEROFILL) { |
|
478 // Zero-fill sections have a size, but no contents. |
|
479 section.contents.start = section.contents.end = NULL; |
|
480 } else if (segment.contents.start == NULL && |
|
481 segment.contents.end == NULL) { |
|
482 // Mach-O files in .dSYM bundles have the contents of the loaded |
|
483 // segments removed, and their file offsets and file sizes zeroed |
|
484 // out. However, the sections within those segments still have |
|
485 // non-zero sizes. There's no reason to call MisplacedSectionData in |
|
486 // this case; the caller may just need the section's load |
|
487 // address. But do set the contents' limits to NULL, for safety. |
|
488 section.contents.start = section.contents.end = NULL; |
|
489 } else { |
|
490 if (offset < size_t(segment.contents.start - buffer_.start) || |
|
491 offset > size_t(segment.contents.end - buffer_.start) || |
|
492 size > size_t(segment.contents.end - buffer_.start - offset)) { |
|
493 reporter_->MisplacedSectionData(section.section_name, |
|
494 section.segment_name); |
|
495 return false; |
|
496 } |
|
497 section.contents.start = buffer_.start + offset; |
|
498 section.contents.end = section.contents.start + size; |
|
499 } |
|
500 if (!handler->HandleSection(section)) |
|
501 return false; |
|
502 } |
|
503 return true; |
|
504 } |
|
505 |
|
506 // A SectionHandler that builds a SectionMap for the sections within a |
|
507 // given segment. |
|
508 class Reader::SectionMapper: public SectionHandler { |
|
509 public: |
|
510 // Create a SectionHandler that populates MAP with an entry for |
|
511 // each section it is given. |
|
512 SectionMapper(SectionMap *map) : map_(map) { } |
|
513 bool HandleSection(const Section §ion) { |
|
514 (*map_)[section.section_name] = section; |
|
515 return true; |
|
516 } |
|
517 private: |
|
518 // The map under construction. (WEAK) |
|
519 SectionMap *map_; |
|
520 }; |
|
521 |
|
522 bool Reader::MapSegmentSections(const Segment &segment, |
|
523 SectionMap *section_map) const { |
|
524 section_map->clear(); |
|
525 SectionMapper mapper(section_map); |
|
526 return WalkSegmentSections(segment, &mapper); |
|
527 } |
|
528 |
|
529 } // namespace mach_o |
|
530 } // namespace google_breakpad |