michael@0: // Copyright (c) 2006, Google Inc.
michael@0: // All rights reserved.
michael@0: //
michael@0: // Redistribution and use in source and binary forms, with or without
michael@0: // modification, are permitted provided that the following conditions are
michael@0: // met:
michael@0: //
michael@0: //     * Redistributions of source code must retain the above copyright
michael@0: // notice, this list of conditions and the following disclaimer.
michael@0: //     * Redistributions in binary form must reproduce the above
michael@0: // copyright notice, this list of conditions and the following disclaimer
michael@0: // in the documentation and/or other materials provided with the
michael@0: // distribution.
michael@0: //     * Neither the name of Google Inc. nor the names of its
michael@0: // contributors may be used to endorse or promote products derived from
michael@0: // this software without specific prior written permission.
michael@0: //
michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0: 
michael@0: // macho_id.cc: Functions to gather identifying information from a macho file
michael@0: //
michael@0: // See macho_id.h for documentation
michael@0: //
michael@0: // Author: Dan Waylonis
michael@0: 
michael@0: extern "C" {  // necessary for Leopard
michael@0:   #include <fcntl.h>
michael@0:   #include <mach-o/loader.h>
michael@0:   #include <mach-o/swap.h>
michael@0:   #include <stdio.h>
michael@0:   #include <stdlib.h>
michael@0:   #include <string.h>
michael@0:   #include <sys/time.h>
michael@0:   #include <sys/types.h>
michael@0:   #include <unistd.h>
michael@0: }
michael@0: 
michael@0: #include "common/mac/macho_id.h"
michael@0: #include "common/mac/macho_walker.h"
michael@0: #include "common/mac/macho_utilities.h"
michael@0: 
michael@0: namespace MacFileUtilities {
michael@0: 
michael@0: using google_breakpad::MD5Init;
michael@0: using google_breakpad::MD5Update;
michael@0: using google_breakpad::MD5Final;
michael@0: 
michael@0: MachoID::MachoID(const char *path)
michael@0:    : memory_(0),
michael@0:      memory_size_(0),
michael@0:      crc_(0), 
michael@0:      md5_context_(), 
michael@0:      update_function_(NULL) {
michael@0:   strlcpy(path_, path, sizeof(path_));
michael@0: }
michael@0: 
michael@0: MachoID::MachoID(const char *path, void *memory, size_t size)
michael@0:    : memory_(memory),
michael@0:      memory_size_(size),
michael@0:      crc_(0), 
michael@0:      md5_context_(), 
michael@0:      update_function_(NULL) {
michael@0:   strlcpy(path_, path, sizeof(path_));
michael@0: }
michael@0: 
michael@0: MachoID::~MachoID() {
michael@0: }
michael@0: 
michael@0: // The CRC info is from http://en.wikipedia.org/wiki/Adler-32
michael@0: // With optimizations from http://www.zlib.net/
michael@0: 
michael@0: // The largest prime smaller than 65536
michael@0: #define MOD_ADLER 65521
michael@0: // MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1
michael@0: #define MAX_BLOCK 5552
michael@0: 
michael@0: void MachoID::UpdateCRC(unsigned char *bytes, size_t size) {
michael@0: // Unrolled loops for summing
michael@0: #define DO1(buf,i)  {sum1 += (buf)[i]; sum2 += sum1;}
michael@0: #define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
michael@0: #define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
michael@0: #define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
michael@0: #define DO16(buf)   DO8(buf,0); DO8(buf,8);
michael@0:   // Split up the crc
michael@0:   uint32_t sum1 = crc_ & 0xFFFF;
michael@0:   uint32_t sum2 = (crc_ >> 16) & 0xFFFF;
michael@0: 
michael@0:   // Do large blocks
michael@0:   while (size >= MAX_BLOCK) {
michael@0:     size -= MAX_BLOCK;
michael@0:     int block_count = MAX_BLOCK / 16;
michael@0:     do {
michael@0:       DO16(bytes);
michael@0:       bytes += 16;
michael@0:     } while (--block_count);
michael@0:     sum1 %= MOD_ADLER;
michael@0:     sum2 %= MOD_ADLER;
michael@0:   }
michael@0: 
michael@0:   // Do remaining bytes
michael@0:   if (size) {
michael@0:     while (size >= 16) {
michael@0:       size -= 16;
michael@0:       DO16(bytes);
michael@0:       bytes += 16;
michael@0:     }
michael@0:     while (size--) {
michael@0:       sum1 += *bytes++;
michael@0:       sum2 += sum1;
michael@0:     }
michael@0:     sum1 %= MOD_ADLER;
michael@0:     sum2 %= MOD_ADLER;
michael@0:     crc_ = (sum2 << 16) | sum1;
michael@0:   }
michael@0: }
michael@0: 
michael@0: void MachoID::UpdateMD5(unsigned char *bytes, size_t size) {
michael@0:   MD5Update(&md5_context_, bytes, size);
michael@0: }
michael@0: 
michael@0: void MachoID::Update(MachoWalker *walker, off_t offset, size_t size) {
michael@0:   if (!update_function_ || !size)
michael@0:     return;
michael@0: 
michael@0:   // Read up to 4k bytes at a time
michael@0:   unsigned char buffer[4096];
michael@0:   size_t buffer_size;
michael@0:   off_t file_offset = offset;
michael@0:   while (size > 0) {
michael@0:     if (size > sizeof(buffer)) {
michael@0:       buffer_size = sizeof(buffer);
michael@0:       size -= buffer_size;
michael@0:     } else {
michael@0:       buffer_size = size;
michael@0:       size = 0;
michael@0:     }
michael@0: 
michael@0:     if (!walker->ReadBytes(buffer, buffer_size, file_offset))
michael@0:       return;
michael@0: 
michael@0:     (this->*update_function_)(buffer, buffer_size);
michael@0:     file_offset += buffer_size;
michael@0:   }
michael@0: }
michael@0: 
michael@0: bool MachoID::UUIDCommand(cpu_type_t cpu_type,
michael@0:                           cpu_subtype_t cpu_subtype,
michael@0:                           unsigned char bytes[16]) {
michael@0:   struct breakpad_uuid_command uuid_cmd;
michael@0:   uuid_cmd.cmd = 0;
michael@0:   if (!WalkHeader(cpu_type, cpu_subtype, UUIDWalkerCB, &uuid_cmd))
michael@0:     return false;
michael@0: 
michael@0:   // If we found the command, we'll have initialized the uuid_command
michael@0:   // structure
michael@0:   if (uuid_cmd.cmd == LC_UUID) {
michael@0:     memcpy(bytes, uuid_cmd.uuid, sizeof(uuid_cmd.uuid));
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: bool MachoID::IDCommand(cpu_type_t cpu_type,
michael@0:                         cpu_subtype_t cpu_subtype,
michael@0:                         unsigned char identifier[16]) {
michael@0:   struct dylib_command dylib_cmd;
michael@0:   dylib_cmd.cmd = 0;
michael@0:   if (!WalkHeader(cpu_type, cpu_subtype, IDWalkerCB, &dylib_cmd))
michael@0:     return false;
michael@0: 
michael@0:   // If we found the command, we'll have initialized the dylib_command
michael@0:   // structure
michael@0:   if (dylib_cmd.cmd == LC_ID_DYLIB) {
michael@0:     // Take the hashed filename, version, and compatability version bytes
michael@0:     // to form the first 12 bytes, pad the rest with zeros
michael@0: 
michael@0:     // create a crude hash of the filename to generate the first 4 bytes
michael@0:     identifier[0] = 0;
michael@0:     identifier[1] = 0;
michael@0:     identifier[2] = 0;
michael@0:     identifier[3] = 0;
michael@0: 
michael@0:     for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) {
michael@0:       identifier[j%4] += path_[i];
michael@0:     }
michael@0: 
michael@0:     identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF;
michael@0:     identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF;
michael@0:     identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF;
michael@0:     identifier[7] = dylib_cmd.dylib.current_version & 0xFF;
michael@0:     identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF;
michael@0:     identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF;
michael@0:     identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF;
michael@0:     identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF;
michael@0:     identifier[12] = (cpu_type >> 24) & 0xFF;
michael@0:     identifier[13] = (cpu_type >> 16) & 0xFF;
michael@0:     identifier[14] = (cpu_type >> 8) & 0xFF;
michael@0:     identifier[15] = cpu_type & 0xFF;
michael@0: 
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: uint32_t MachoID::Adler32(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) {
michael@0:   update_function_ = &MachoID::UpdateCRC;
michael@0:   crc_ = 0;
michael@0: 
michael@0:   if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
michael@0:     return 0;
michael@0: 
michael@0:   return crc_;
michael@0: }
michael@0: 
michael@0: bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) {
michael@0:   update_function_ = &MachoID::UpdateMD5;
michael@0: 
michael@0:   MD5Init(&md5_context_);
michael@0: 
michael@0:   if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
michael@0:     return false;
michael@0: 
michael@0:   MD5Final(identifier, &md5_context_);
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: bool MachoID::WalkHeader(cpu_type_t cpu_type,
michael@0:                          cpu_subtype_t cpu_subtype,
michael@0:                          MachoWalker::LoadCommandCallback callback,
michael@0:                          void *context) {
michael@0:   if (memory_) {
michael@0:     MachoWalker walker(memory_, memory_size_, callback, context);
michael@0:     return walker.WalkHeader(cpu_type, cpu_subtype);
michael@0:   } else {
michael@0:     MachoWalker walker(path_, callback, context);
michael@0:     return walker.WalkHeader(cpu_type, cpu_subtype);
michael@0:   }
michael@0: }
michael@0: 
michael@0: // static
michael@0: bool MachoID::WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
michael@0:                        bool swap, void *context) {
michael@0:   MachoID *macho_id = (MachoID *)context;
michael@0: 
michael@0:   if (cmd->cmd == LC_SEGMENT) {
michael@0:     struct segment_command seg;
michael@0: 
michael@0:     if (!walker->ReadBytes(&seg, sizeof(seg), offset))
michael@0:       return false;
michael@0: 
michael@0:     if (swap)
michael@0:       swap_segment_command(&seg, NXHostByteOrder());
michael@0: 
michael@0:     struct mach_header_64 header;
michael@0:     off_t header_offset;
michael@0:     
michael@0:     if (!walker->CurrentHeader(&header, &header_offset))
michael@0:       return false;
michael@0:         
michael@0:     // Process segments that have sections:
michael@0:     // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
michael@0:     offset += sizeof(struct segment_command);
michael@0:     struct section sec;
michael@0:     for (unsigned long i = 0; i < seg.nsects; ++i) {
michael@0:       if (!walker->ReadBytes(&sec, sizeof(sec), offset))
michael@0:         return false;
michael@0: 
michael@0:       if (swap)
michael@0:         swap_section(&sec, 1, NXHostByteOrder());
michael@0: 
michael@0:       // sections of type S_ZEROFILL are "virtual" and contain no data
michael@0:       // in the file itself
michael@0:       if ((sec.flags & SECTION_TYPE) != S_ZEROFILL && sec.offset != 0)
michael@0:         macho_id->Update(walker, header_offset + sec.offset, sec.size);
michael@0: 
michael@0:       offset += sizeof(struct section);
michael@0:     }
michael@0:   } else if (cmd->cmd == LC_SEGMENT_64) {
michael@0:     struct segment_command_64 seg64;
michael@0: 
michael@0:     if (!walker->ReadBytes(&seg64, sizeof(seg64), offset))
michael@0:       return false;
michael@0: 
michael@0:     if (swap)
michael@0:       breakpad_swap_segment_command_64(&seg64, NXHostByteOrder());
michael@0: 
michael@0:     struct mach_header_64 header;
michael@0:     off_t header_offset;
michael@0:     
michael@0:     if (!walker->CurrentHeader(&header, &header_offset))
michael@0:       return false;
michael@0:     
michael@0:     // Process segments that have sections:
michael@0:     // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
michael@0:     offset += sizeof(struct segment_command_64);
michael@0:     struct section_64 sec64;
michael@0:     for (unsigned long i = 0; i < seg64.nsects; ++i) {
michael@0:       if (!walker->ReadBytes(&sec64, sizeof(sec64), offset))
michael@0:         return false;
michael@0: 
michael@0:       if (swap)
michael@0:         breakpad_swap_section_64(&sec64, 1, NXHostByteOrder());
michael@0: 
michael@0:       // sections of type S_ZEROFILL are "virtual" and contain no data
michael@0:       // in the file itself
michael@0:       if ((sec64.flags & SECTION_TYPE) != S_ZEROFILL && sec64.offset != 0)
michael@0:         macho_id->Update(walker, 
michael@0:                          header_offset + sec64.offset, 
michael@0:                          (size_t)sec64.size);
michael@0: 
michael@0:       offset += sizeof(struct section_64);
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   // Continue processing
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: // static
michael@0: bool MachoID::UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
michael@0:                            bool swap, void *context) {
michael@0:   if (cmd->cmd == LC_UUID) {
michael@0:     struct breakpad_uuid_command *uuid_cmd =
michael@0:       (struct breakpad_uuid_command *)context;
michael@0: 
michael@0:     if (!walker->ReadBytes(uuid_cmd, sizeof(struct breakpad_uuid_command),
michael@0:                            offset))
michael@0:       return false;
michael@0: 
michael@0:     if (swap)
michael@0:       breakpad_swap_uuid_command(uuid_cmd, NXHostByteOrder());
michael@0: 
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   // Continue processing
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: // static
michael@0: bool MachoID::IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
michael@0:                          bool swap, void *context) {
michael@0:   if (cmd->cmd == LC_ID_DYLIB) {
michael@0:     struct dylib_command *dylib_cmd = (struct dylib_command *)context;
michael@0: 
michael@0:     if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset))
michael@0:       return false;
michael@0: 
michael@0:     if (swap)
michael@0:       swap_dylib_command(dylib_cmd, NXHostByteOrder());
michael@0: 
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   // Continue processing
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: }  // namespace MacFileUtilities