|
1 // -*- mode: C++ -*- |
|
2 |
|
3 // Copyright (c) 2010, Google Inc. |
|
4 // All rights reserved. |
|
5 // |
|
6 // Redistribution and use in source and binary forms, with or without |
|
7 // modification, are permitted provided that the following conditions are |
|
8 // met: |
|
9 // |
|
10 // * Redistributions of source code must retain the above copyright |
|
11 // notice, this list of conditions and the following disclaimer. |
|
12 // * Redistributions in binary form must reproduce the above |
|
13 // copyright notice, this list of conditions and the following disclaimer |
|
14 // in the documentation and/or other materials provided with the |
|
15 // distribution. |
|
16 // * Neither the name of Google Inc. nor the names of its |
|
17 // contributors may be used to endorse or promote products derived from |
|
18 // this software without specific prior written permission. |
|
19 // |
|
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
31 |
|
32 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
|
33 |
|
34 // macho_reader.h: A class for parsing Mach-O files. |
|
35 |
|
36 #ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_ |
|
37 #define BREAKPAD_COMMON_MAC_MACHO_READER_H_ |
|
38 |
|
39 #include <mach-o/loader.h> |
|
40 #include <mach-o/fat.h> |
|
41 #include <stdint.h> |
|
42 #include <stdlib.h> |
|
43 #include <unistd.h> |
|
44 |
|
45 #include <map> |
|
46 #include <string> |
|
47 #include <vector> |
|
48 |
|
49 #include "common/byte_cursor.h" |
|
50 |
|
51 namespace google_breakpad { |
|
52 namespace mach_o { |
|
53 |
|
54 using std::map; |
|
55 using std::string; |
|
56 using std::vector; |
|
57 |
|
58 // The Mac headers don't specify particular types for these groups of |
|
59 // constants, but defining them here provides some documentation |
|
60 // value. We also give them the same width as the fields in which |
|
61 // they appear, which makes them a bit easier to use with ByteCursors. |
|
62 typedef uint32_t Magic; |
|
63 typedef uint32_t FileType; |
|
64 typedef uint32_t FileFlags; |
|
65 typedef uint32_t LoadCommandType; |
|
66 typedef uint32_t SegmentFlags; |
|
67 typedef uint32_t SectionFlags; |
|
68 |
|
69 // A parser for fat binary files, used to store universal binaries. |
|
70 // When applied to a (non-fat) Mach-O file, this behaves as if the |
|
71 // file were a fat file containing a single object file. |
|
72 class FatReader { |
|
73 public: |
|
74 |
|
75 // A class for reporting errors found while parsing fat binary files. The |
|
76 // default definitions of these methods print messages to stderr. |
|
77 class Reporter { |
|
78 public: |
|
79 // Create a reporter that attributes problems to |filename|. |
|
80 explicit Reporter(const string &filename) : filename_(filename) { } |
|
81 |
|
82 virtual ~Reporter() { } |
|
83 |
|
84 // The data does not begin with a fat binary or Mach-O magic number. |
|
85 // This is a fatal error. |
|
86 virtual void BadHeader(); |
|
87 |
|
88 // The Mach-O fat binary file ends abruptly, without enough space |
|
89 // to contain an object file it claims is present. |
|
90 virtual void MisplacedObjectFile(); |
|
91 |
|
92 // The file ends abruptly: either it is not large enough to hold a |
|
93 // complete header, or the header implies that contents are present |
|
94 // beyond the actual end of the file. |
|
95 virtual void TooShort(); |
|
96 |
|
97 private: |
|
98 // The filename to which the reader should attribute problems. |
|
99 string filename_; |
|
100 }; |
|
101 |
|
102 // Create a fat binary file reader that uses |reporter| to report problems. |
|
103 explicit FatReader(Reporter *reporter) : reporter_(reporter) { } |
|
104 |
|
105 // Read the |size| bytes at |buffer| as a fat binary file. On success, |
|
106 // return true; on failure, report the problem to reporter_ and return |
|
107 // false. |
|
108 // |
|
109 // If the data is a plain Mach-O file, rather than a fat binary file, |
|
110 // then the reader behaves as if it had found a fat binary file whose |
|
111 // single object file is the Mach-O file. |
|
112 bool Read(const uint8_t *buffer, size_t size); |
|
113 |
|
114 // Return an array of 'struct fat_arch' structures describing the |
|
115 // object files present in this fat binary file. Set |size| to the |
|
116 // number of elements in the array. |
|
117 // |
|
118 // Assuming Read returned true, the entries are validated: it is |
|
119 // safe to assume that the offsets and sizes in each 'struct |
|
120 // fat_arch' refer to subranges of the bytes passed to Read. |
|
121 // |
|
122 // If there are no object files in this fat binary, then this |
|
123 // function can return NULL. |
|
124 // |
|
125 // The array is owned by this FatReader instance; it will be freed when |
|
126 // this FatReader is destroyed. |
|
127 // |
|
128 // This function returns a C-style array instead of a vector to make it |
|
129 // possible to use the result with OS X functions like NXFindBestFatArch, |
|
130 // so that the symbol dumper will behave consistently with other OS X |
|
131 // utilities that work with fat binaries. |
|
132 const struct fat_arch *object_files(size_t *count) const { |
|
133 *count = object_files_.size(); |
|
134 if (object_files_.size() > 0) |
|
135 return &object_files_[0]; |
|
136 return NULL; |
|
137 } |
|
138 |
|
139 private: |
|
140 // We use this to report problems parsing the file's contents. (WEAK) |
|
141 Reporter *reporter_; |
|
142 |
|
143 // The contents of the fat binary or Mach-O file we're parsing. We do not |
|
144 // own the storage it refers to. |
|
145 ByteBuffer buffer_; |
|
146 |
|
147 // The magic number of this binary, in host byte order. |
|
148 Magic magic_; |
|
149 |
|
150 // The list of object files in this binary. |
|
151 // object_files_.size() == fat_header.nfat_arch |
|
152 vector<struct fat_arch> object_files_; |
|
153 }; |
|
154 |
|
155 // A segment in a Mach-O file. All these fields have been byte-swapped as |
|
156 // appropriate for use by the executing architecture. |
|
157 struct Segment { |
|
158 // The ByteBuffers below point into the bytes passed to the Reader that |
|
159 // created this Segment. |
|
160 |
|
161 ByteBuffer section_list; // This segment's section list. |
|
162 ByteBuffer contents; // This segment's contents. |
|
163 |
|
164 // This segment's name. |
|
165 string name; |
|
166 |
|
167 // The address at which this segment should be loaded in memory. If |
|
168 // bits_64 is false, only the bottom 32 bits of this value are valid. |
|
169 uint64_t vmaddr; |
|
170 |
|
171 // The size of this segment when loaded into memory. This may be larger |
|
172 // than contents.Size(), in which case the extra area will be |
|
173 // initialized with zeros. If bits_64 is false, only the bottom 32 bits |
|
174 // of this value are valid. |
|
175 uint64_t vmsize; |
|
176 |
|
177 // The maximum and initial VM protection of this segment's contents. |
|
178 uint32_t maxprot; |
|
179 uint32_t initprot; |
|
180 |
|
181 // The number of sections in section_list. |
|
182 uint32_t nsects; |
|
183 |
|
184 // Flags describing this segment, from SegmentFlags. |
|
185 uint32_t flags; |
|
186 |
|
187 // True if this is a 64-bit section; false if it is a 32-bit section. |
|
188 bool bits_64; |
|
189 }; |
|
190 |
|
191 // A section in a Mach-O file. All these fields have been byte-swapped as |
|
192 // appropriate for use by the executing architecture. |
|
193 struct Section { |
|
194 // This section's contents. This points into the bytes passed to the |
|
195 // Reader that created this Section. |
|
196 ByteBuffer contents; |
|
197 |
|
198 // This section's name. |
|
199 string section_name; // section[_64].sectname |
|
200 // The name of the segment this section belongs to. |
|
201 string segment_name; // section[_64].segname |
|
202 |
|
203 // The address at which this section's contents should be loaded in |
|
204 // memory. If bits_64 is false, only the bottom 32 bits of this value |
|
205 // are valid. |
|
206 uint64_t address; |
|
207 |
|
208 // The contents of this section should be loaded into memory at an |
|
209 // address which is a multiple of (two raised to this power). |
|
210 uint32_t align; |
|
211 |
|
212 // Flags from SectionFlags describing the section's contents. |
|
213 uint32_t flags; |
|
214 |
|
215 // We don't support reading relocations yet. |
|
216 |
|
217 // True if this is a 64-bit section; false if it is a 32-bit section. |
|
218 bool bits_64; |
|
219 }; |
|
220 |
|
221 // A map from section names to Sections. |
|
222 typedef map<string, Section> SectionMap; |
|
223 |
|
224 // A reader for a Mach-O file. |
|
225 // |
|
226 // This does not handle fat binaries; see FatReader above. FatReader |
|
227 // provides a friendly interface for parsing data that could be either a |
|
228 // fat binary or a Mach-O file. |
|
229 class Reader { |
|
230 public: |
|
231 |
|
232 // A class for reporting errors found while parsing Mach-O files. The |
|
233 // default definitions of these member functions print messages to |
|
234 // stderr. |
|
235 class Reporter { |
|
236 public: |
|
237 // Create a reporter that attributes problems to |filename|. |
|
238 explicit Reporter(const string &filename) : filename_(filename) { } |
|
239 virtual ~Reporter() { } |
|
240 |
|
241 // Reporter functions for fatal errors return void; the reader will |
|
242 // definitely return an error to its caller after calling them |
|
243 |
|
244 // The data does not begin with a Mach-O magic number, or the magic |
|
245 // number does not match the expected value for the cpu architecture. |
|
246 // This is a fatal error. |
|
247 virtual void BadHeader(); |
|
248 |
|
249 // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|) |
|
250 // does not match the expected CPU architecture |
|
251 // (|expected_cpu_type|, |expected_cpu_subtype|). |
|
252 virtual void CPUTypeMismatch(cpu_type_t cpu_type, |
|
253 cpu_subtype_t cpu_subtype, |
|
254 cpu_type_t expected_cpu_type, |
|
255 cpu_subtype_t expected_cpu_subtype); |
|
256 |
|
257 // The file ends abruptly: either it is not large enough to hold a |
|
258 // complete header, or the header implies that contents are present |
|
259 // beyond the actual end of the file. |
|
260 virtual void HeaderTruncated(); |
|
261 |
|
262 // The file's load command region, as given in the Mach-O header, is |
|
263 // too large for the file. |
|
264 virtual void LoadCommandRegionTruncated(); |
|
265 |
|
266 // The file's Mach-O header claims the file contains |claimed| load |
|
267 // commands, but the I'th load command, of type |type|, extends beyond |
|
268 // the end of the load command region, as given by the Mach-O header. |
|
269 // If |type| is zero, the command's type was unreadable. |
|
270 virtual void LoadCommandsOverrun(size_t claimed, size_t i, |
|
271 LoadCommandType type); |
|
272 |
|
273 // The contents of the |i|'th load command, of type |type|, extend beyond |
|
274 // the size given in the load command's header. |
|
275 virtual void LoadCommandTooShort(size_t i, LoadCommandType type); |
|
276 |
|
277 // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named |
|
278 // |name| is too short to hold the sections that its header says it does. |
|
279 // (This more specific than LoadCommandTooShort.) |
|
280 virtual void SectionsMissing(const string &name); |
|
281 |
|
282 // The segment named |name| claims that its contents lie beyond the end |
|
283 // of the file. |
|
284 virtual void MisplacedSegmentData(const string &name); |
|
285 |
|
286 // The section named |section| in the segment named |segment| claims that |
|
287 // its contents do not lie entirely within the segment. |
|
288 virtual void MisplacedSectionData(const string §ion, |
|
289 const string &segment); |
|
290 |
|
291 // The LC_SYMTAB command claims that symbol table contents are located |
|
292 // beyond the end of the file. |
|
293 virtual void MisplacedSymbolTable(); |
|
294 |
|
295 // An attempt was made to read a Mach-O file of the unsupported |
|
296 // CPU architecture |cpu_type|. |
|
297 virtual void UnsupportedCPUType(cpu_type_t cpu_type); |
|
298 |
|
299 private: |
|
300 string filename_; |
|
301 }; |
|
302 |
|
303 // A handler for sections parsed from a segment. The WalkSegmentSections |
|
304 // member function accepts an instance of this class, and applies it to |
|
305 // each section defined in a given segment. |
|
306 class SectionHandler { |
|
307 public: |
|
308 virtual ~SectionHandler() { } |
|
309 |
|
310 // Called to report that the segment's section list contains |section|. |
|
311 // This should return true if the iteration should continue, or false |
|
312 // if it should stop. |
|
313 virtual bool HandleSection(const Section §ion) = 0; |
|
314 }; |
|
315 |
|
316 // A handler for the load commands in a Mach-O file. |
|
317 class LoadCommandHandler { |
|
318 public: |
|
319 LoadCommandHandler() { } |
|
320 virtual ~LoadCommandHandler() { } |
|
321 |
|
322 // When called from WalkLoadCommands, the following handler functions |
|
323 // should return true if they wish to continue iterating over the load |
|
324 // command list, or false if they wish to stop iterating. |
|
325 // |
|
326 // When called from LoadCommandIterator::Handle or Reader::Handle, |
|
327 // these functions' return values are simply passed through to Handle's |
|
328 // caller. |
|
329 // |
|
330 // The definitions provided by this base class simply return true; the |
|
331 // default is to silently ignore sections whose member functions the |
|
332 // subclass doesn't override. |
|
333 |
|
334 // COMMAND is load command we don't recognize. We provide only the |
|
335 // command type and a ByteBuffer enclosing the command's data (If we |
|
336 // cannot parse the command type or its size, we call |
|
337 // reporter_->IncompleteLoadCommand instead.) |
|
338 virtual bool UnknownCommand(LoadCommandType type, |
|
339 const ByteBuffer &contents) { |
|
340 return true; |
|
341 } |
|
342 |
|
343 // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment |
|
344 // with the properties given in |segment|. |
|
345 virtual bool SegmentCommand(const Segment &segment) { |
|
346 return true; |
|
347 } |
|
348 |
|
349 // The load command is LC_SYMTAB. |entries| holds the array of nlist |
|
350 // entries, and |names| holds the strings the entries refer to. |
|
351 virtual bool SymtabCommand(const ByteBuffer &entries, |
|
352 const ByteBuffer &names) { |
|
353 return true; |
|
354 } |
|
355 |
|
356 // Add handler functions for more load commands here as needed. |
|
357 }; |
|
358 |
|
359 // Create a Mach-O file reader that reports problems to |reporter|. |
|
360 explicit Reader(Reporter *reporter) |
|
361 : reporter_(reporter) { } |
|
362 |
|
363 // Read the given data as a Mach-O file. The reader retains pointers |
|
364 // into the data passed, so the data should live as long as the reader |
|
365 // does. On success, return true; on failure, return false. |
|
366 // |
|
367 // At most one of these functions should be invoked once on each Reader |
|
368 // instance. |
|
369 bool Read(const uint8_t *buffer, |
|
370 size_t size, |
|
371 cpu_type_t expected_cpu_type, |
|
372 cpu_subtype_t expected_cpu_subtype); |
|
373 bool Read(const ByteBuffer &buffer, |
|
374 cpu_type_t expected_cpu_type, |
|
375 cpu_subtype_t expected_cpu_subtype) { |
|
376 return Read(buffer.start, |
|
377 buffer.Size(), |
|
378 expected_cpu_type, |
|
379 expected_cpu_subtype); |
|
380 } |
|
381 |
|
382 // Return this file's characteristics, as found in the Mach-O header. |
|
383 cpu_type_t cpu_type() const { return cpu_type_; } |
|
384 cpu_subtype_t cpu_subtype() const { return cpu_subtype_; } |
|
385 FileType file_type() const { return file_type_; } |
|
386 FileFlags flags() const { return flags_; } |
|
387 |
|
388 // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit |
|
389 // Mach-O file. |
|
390 bool bits_64() const { return bits_64_; } |
|
391 |
|
392 // Return true if this is a big-endian Mach-O file, false if it is |
|
393 // little-endian. |
|
394 bool big_endian() const { return big_endian_; } |
|
395 |
|
396 // Apply |handler| to each load command in this Mach-O file, stopping when |
|
397 // a handler function returns false. If we encounter a malformed load |
|
398 // command, report it via reporter_ and return false. Return true if all |
|
399 // load commands were parseable and all handlers returned true. |
|
400 bool WalkLoadCommands(LoadCommandHandler *handler) const; |
|
401 |
|
402 // Set |segment| to describe the segment named |name|, if present. If |
|
403 // found, |segment|'s byte buffers refer to a subregion of the bytes |
|
404 // passed to Read. If we find the section, return true; otherwise, |
|
405 // return false. |
|
406 bool FindSegment(const string &name, Segment *segment) const; |
|
407 |
|
408 // Apply |handler| to each section defined in |segment|. If |handler| returns |
|
409 // false, stop iterating and return false. If all calls to |handler| return |
|
410 // true and we reach the end of the section list, return true. |
|
411 bool WalkSegmentSections(const Segment &segment, SectionHandler *handler) |
|
412 const; |
|
413 |
|
414 // Clear |section_map| and then populate it with a map of the sections |
|
415 // in |segment|, from section names to Section structures. |
|
416 // Each Section's contents refer to bytes in |segment|'s contents. |
|
417 // On success, return true; if a problem occurs, report it and return false. |
|
418 bool MapSegmentSections(const Segment &segment, SectionMap *section_map) |
|
419 const; |
|
420 |
|
421 private: |
|
422 // Used internally. |
|
423 class SegmentFinder; |
|
424 class SectionMapper; |
|
425 |
|
426 // We use this to report problems parsing the file's contents. (WEAK) |
|
427 Reporter *reporter_; |
|
428 |
|
429 // The contents of the Mach-O file we're parsing. We do not own the |
|
430 // storage it refers to. |
|
431 ByteBuffer buffer_; |
|
432 |
|
433 // True if this file is big-endian. |
|
434 bool big_endian_; |
|
435 |
|
436 // True if this file is a 64-bit Mach-O file. |
|
437 bool bits_64_; |
|
438 |
|
439 // This file's cpu type and subtype. |
|
440 cpu_type_t cpu_type_; // mach_header[_64].cputype |
|
441 cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype |
|
442 |
|
443 // This file's type. |
|
444 FileType file_type_; // mach_header[_64].filetype |
|
445 |
|
446 // The region of buffer_ occupied by load commands. |
|
447 ByteBuffer load_commands_; |
|
448 |
|
449 // The number of load commands in load_commands_. |
|
450 uint32_t load_command_count_; // mach_header[_64].ncmds |
|
451 |
|
452 // This file's header flags. |
|
453 FileFlags flags_; |
|
454 }; |
|
455 |
|
456 } // namespace mach_o |
|
457 } // namespace google_breakpad |
|
458 |
|
459 #endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_ |