michael@0: // Copyright (c) 2006, Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // macho_id.cc: Functions to gather identifying information from a macho file michael@0: // michael@0: // See macho_id.h for documentation michael@0: // michael@0: // Author: Dan Waylonis michael@0: michael@0: extern "C" { // necessary for Leopard michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: } michael@0: michael@0: #include "common/mac/macho_id.h" michael@0: #include "common/mac/macho_walker.h" michael@0: #include "common/mac/macho_utilities.h" michael@0: michael@0: namespace MacFileUtilities { michael@0: michael@0: using google_breakpad::MD5Init; michael@0: using google_breakpad::MD5Update; michael@0: using google_breakpad::MD5Final; michael@0: michael@0: MachoID::MachoID(const char *path) michael@0: : memory_(0), michael@0: memory_size_(0), michael@0: crc_(0), michael@0: md5_context_(), michael@0: update_function_(NULL) { michael@0: strlcpy(path_, path, sizeof(path_)); michael@0: } michael@0: michael@0: MachoID::MachoID(const char *path, void *memory, size_t size) michael@0: : memory_(memory), michael@0: memory_size_(size), michael@0: crc_(0), michael@0: md5_context_(), michael@0: update_function_(NULL) { michael@0: strlcpy(path_, path, sizeof(path_)); michael@0: } michael@0: michael@0: MachoID::~MachoID() { michael@0: } michael@0: michael@0: // The CRC info is from http://en.wikipedia.org/wiki/Adler-32 michael@0: // With optimizations from http://www.zlib.net/ michael@0: michael@0: // The largest prime smaller than 65536 michael@0: #define MOD_ADLER 65521 michael@0: // MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1 michael@0: #define MAX_BLOCK 5552 michael@0: michael@0: void MachoID::UpdateCRC(unsigned char *bytes, size_t size) { michael@0: // Unrolled loops for summing michael@0: #define DO1(buf,i) {sum1 += (buf)[i]; sum2 += sum1;} michael@0: #define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); michael@0: #define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); michael@0: #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); michael@0: #define DO16(buf) DO8(buf,0); DO8(buf,8); michael@0: // Split up the crc michael@0: uint32_t sum1 = crc_ & 0xFFFF; michael@0: uint32_t sum2 = (crc_ >> 16) & 0xFFFF; michael@0: michael@0: // Do large blocks michael@0: while (size >= MAX_BLOCK) { michael@0: size -= MAX_BLOCK; michael@0: int block_count = MAX_BLOCK / 16; michael@0: do { michael@0: DO16(bytes); michael@0: bytes += 16; michael@0: } while (--block_count); michael@0: sum1 %= MOD_ADLER; michael@0: sum2 %= MOD_ADLER; michael@0: } michael@0: michael@0: // Do remaining bytes michael@0: if (size) { michael@0: while (size >= 16) { michael@0: size -= 16; michael@0: DO16(bytes); michael@0: bytes += 16; michael@0: } michael@0: while (size--) { michael@0: sum1 += *bytes++; michael@0: sum2 += sum1; michael@0: } michael@0: sum1 %= MOD_ADLER; michael@0: sum2 %= MOD_ADLER; michael@0: crc_ = (sum2 << 16) | sum1; michael@0: } michael@0: } michael@0: michael@0: void MachoID::UpdateMD5(unsigned char *bytes, size_t size) { michael@0: MD5Update(&md5_context_, bytes, size); michael@0: } michael@0: michael@0: void MachoID::Update(MachoWalker *walker, off_t offset, size_t size) { michael@0: if (!update_function_ || !size) michael@0: return; michael@0: michael@0: // Read up to 4k bytes at a time michael@0: unsigned char buffer[4096]; michael@0: size_t buffer_size; michael@0: off_t file_offset = offset; michael@0: while (size > 0) { michael@0: if (size > sizeof(buffer)) { michael@0: buffer_size = sizeof(buffer); michael@0: size -= buffer_size; michael@0: } else { michael@0: buffer_size = size; michael@0: size = 0; michael@0: } michael@0: michael@0: if (!walker->ReadBytes(buffer, buffer_size, file_offset)) michael@0: return; michael@0: michael@0: (this->*update_function_)(buffer, buffer_size); michael@0: file_offset += buffer_size; michael@0: } michael@0: } michael@0: michael@0: bool MachoID::UUIDCommand(cpu_type_t cpu_type, michael@0: cpu_subtype_t cpu_subtype, michael@0: unsigned char bytes[16]) { michael@0: struct breakpad_uuid_command uuid_cmd; michael@0: uuid_cmd.cmd = 0; michael@0: if (!WalkHeader(cpu_type, cpu_subtype, UUIDWalkerCB, &uuid_cmd)) michael@0: return false; michael@0: michael@0: // If we found the command, we'll have initialized the uuid_command michael@0: // structure michael@0: if (uuid_cmd.cmd == LC_UUID) { michael@0: memcpy(bytes, uuid_cmd.uuid, sizeof(uuid_cmd.uuid)); michael@0: return true; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool MachoID::IDCommand(cpu_type_t cpu_type, michael@0: cpu_subtype_t cpu_subtype, michael@0: unsigned char identifier[16]) { michael@0: struct dylib_command dylib_cmd; michael@0: dylib_cmd.cmd = 0; michael@0: if (!WalkHeader(cpu_type, cpu_subtype, IDWalkerCB, &dylib_cmd)) michael@0: return false; michael@0: michael@0: // If we found the command, we'll have initialized the dylib_command michael@0: // structure michael@0: if (dylib_cmd.cmd == LC_ID_DYLIB) { michael@0: // Take the hashed filename, version, and compatability version bytes michael@0: // to form the first 12 bytes, pad the rest with zeros michael@0: michael@0: // create a crude hash of the filename to generate the first 4 bytes michael@0: identifier[0] = 0; michael@0: identifier[1] = 0; michael@0: identifier[2] = 0; michael@0: identifier[3] = 0; michael@0: michael@0: for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) { michael@0: identifier[j%4] += path_[i]; michael@0: } michael@0: michael@0: identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF; michael@0: identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF; michael@0: identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF; michael@0: identifier[7] = dylib_cmd.dylib.current_version & 0xFF; michael@0: identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF; michael@0: identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF; michael@0: identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF; michael@0: identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF; michael@0: identifier[12] = (cpu_type >> 24) & 0xFF; michael@0: identifier[13] = (cpu_type >> 16) & 0xFF; michael@0: identifier[14] = (cpu_type >> 8) & 0xFF; michael@0: identifier[15] = cpu_type & 0xFF; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: uint32_t MachoID::Adler32(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) { michael@0: update_function_ = &MachoID::UpdateCRC; michael@0: crc_ = 0; michael@0: michael@0: if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this)) michael@0: return 0; michael@0: michael@0: return crc_; michael@0: } michael@0: michael@0: bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) { michael@0: update_function_ = &MachoID::UpdateMD5; michael@0: michael@0: MD5Init(&md5_context_); michael@0: michael@0: if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this)) michael@0: return false; michael@0: michael@0: MD5Final(identifier, &md5_context_); michael@0: return true; michael@0: } michael@0: michael@0: bool MachoID::WalkHeader(cpu_type_t cpu_type, michael@0: cpu_subtype_t cpu_subtype, michael@0: MachoWalker::LoadCommandCallback callback, michael@0: void *context) { michael@0: if (memory_) { michael@0: MachoWalker walker(memory_, memory_size_, callback, context); michael@0: return walker.WalkHeader(cpu_type, cpu_subtype); michael@0: } else { michael@0: MachoWalker walker(path_, callback, context); michael@0: return walker.WalkHeader(cpu_type, cpu_subtype); michael@0: } michael@0: } michael@0: michael@0: // static michael@0: bool MachoID::WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, michael@0: bool swap, void *context) { michael@0: MachoID *macho_id = (MachoID *)context; michael@0: michael@0: if (cmd->cmd == LC_SEGMENT) { michael@0: struct segment_command seg; michael@0: michael@0: if (!walker->ReadBytes(&seg, sizeof(seg), offset)) michael@0: return false; michael@0: michael@0: if (swap) michael@0: swap_segment_command(&seg, NXHostByteOrder()); michael@0: michael@0: struct mach_header_64 header; michael@0: off_t header_offset; michael@0: michael@0: if (!walker->CurrentHeader(&header, &header_offset)) michael@0: return false; michael@0: michael@0: // Process segments that have sections: michael@0: // (e.g., __TEXT, __DATA, __IMPORT, __OBJC) michael@0: offset += sizeof(struct segment_command); michael@0: struct section sec; michael@0: for (unsigned long i = 0; i < seg.nsects; ++i) { michael@0: if (!walker->ReadBytes(&sec, sizeof(sec), offset)) michael@0: return false; michael@0: michael@0: if (swap) michael@0: swap_section(&sec, 1, NXHostByteOrder()); michael@0: michael@0: // sections of type S_ZEROFILL are "virtual" and contain no data michael@0: // in the file itself michael@0: if ((sec.flags & SECTION_TYPE) != S_ZEROFILL && sec.offset != 0) michael@0: macho_id->Update(walker, header_offset + sec.offset, sec.size); michael@0: michael@0: offset += sizeof(struct section); michael@0: } michael@0: } else if (cmd->cmd == LC_SEGMENT_64) { michael@0: struct segment_command_64 seg64; michael@0: michael@0: if (!walker->ReadBytes(&seg64, sizeof(seg64), offset)) michael@0: return false; michael@0: michael@0: if (swap) michael@0: breakpad_swap_segment_command_64(&seg64, NXHostByteOrder()); michael@0: michael@0: struct mach_header_64 header; michael@0: off_t header_offset; michael@0: michael@0: if (!walker->CurrentHeader(&header, &header_offset)) michael@0: return false; michael@0: michael@0: // Process segments that have sections: michael@0: // (e.g., __TEXT, __DATA, __IMPORT, __OBJC) michael@0: offset += sizeof(struct segment_command_64); michael@0: struct section_64 sec64; michael@0: for (unsigned long i = 0; i < seg64.nsects; ++i) { michael@0: if (!walker->ReadBytes(&sec64, sizeof(sec64), offset)) michael@0: return false; michael@0: michael@0: if (swap) michael@0: breakpad_swap_section_64(&sec64, 1, NXHostByteOrder()); michael@0: michael@0: // sections of type S_ZEROFILL are "virtual" and contain no data michael@0: // in the file itself michael@0: if ((sec64.flags & SECTION_TYPE) != S_ZEROFILL && sec64.offset != 0) michael@0: macho_id->Update(walker, michael@0: header_offset + sec64.offset, michael@0: (size_t)sec64.size); michael@0: michael@0: offset += sizeof(struct section_64); michael@0: } michael@0: } michael@0: michael@0: // Continue processing michael@0: return true; michael@0: } michael@0: michael@0: // static michael@0: bool MachoID::UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, michael@0: bool swap, void *context) { michael@0: if (cmd->cmd == LC_UUID) { michael@0: struct breakpad_uuid_command *uuid_cmd = michael@0: (struct breakpad_uuid_command *)context; michael@0: michael@0: if (!walker->ReadBytes(uuid_cmd, sizeof(struct breakpad_uuid_command), michael@0: offset)) michael@0: return false; michael@0: michael@0: if (swap) michael@0: breakpad_swap_uuid_command(uuid_cmd, NXHostByteOrder()); michael@0: michael@0: return false; michael@0: } michael@0: michael@0: // Continue processing michael@0: return true; michael@0: } michael@0: michael@0: // static michael@0: bool MachoID::IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset, michael@0: bool swap, void *context) { michael@0: if (cmd->cmd == LC_ID_DYLIB) { michael@0: struct dylib_command *dylib_cmd = (struct dylib_command *)context; michael@0: michael@0: if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset)) michael@0: return false; michael@0: michael@0: if (swap) michael@0: swap_dylib_command(dylib_cmd, NXHostByteOrder()); michael@0: michael@0: return false; michael@0: } michael@0: michael@0: // Continue processing michael@0: return true; michael@0: } michael@0: michael@0: } // namespace MacFileUtilities