michael@0: // -*- mode: c++ -*- michael@0: michael@0: // Copyright (c) 2011, Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // Author: Jim Blandy michael@0: michael@0: // dump_syms.mm: Create a symbol file for use with minidumps michael@0: michael@0: #include "common/mac/dump_syms.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "common/dwarf/bytereader-inl.h" michael@0: #include "common/dwarf/dwarf2reader.h" michael@0: #include "common/dwarf_cfi_to_module.h" michael@0: #include "common/dwarf_cu_to_module.h" michael@0: #include "common/dwarf_line_to_module.h" michael@0: #include "common/mac/file_id.h" michael@0: #include "common/mac/arch_utilities.h" michael@0: #include "common/mac/macho_reader.h" michael@0: #include "common/module.h" michael@0: #include "common/scoped_ptr.h" michael@0: #include "common/stabs_reader.h" michael@0: #include "common/stabs_to_module.h" michael@0: #include "common/symbol_data.h" michael@0: michael@0: #ifndef CPU_TYPE_ARM michael@0: #define CPU_TYPE_ARM (static_cast(12)) michael@0: #endif // CPU_TYPE_ARM michael@0: michael@0: using dwarf2reader::ByteReader; michael@0: using google_breakpad::DwarfCUToModule; michael@0: using google_breakpad::DwarfLineToModule; michael@0: using google_breakpad::FileID; michael@0: using google_breakpad::mach_o::FatReader; michael@0: using google_breakpad::mach_o::Section; michael@0: using google_breakpad::mach_o::Segment; michael@0: using google_breakpad::Module; michael@0: using google_breakpad::StabsReader; michael@0: using google_breakpad::StabsToModule; michael@0: using google_breakpad::scoped_ptr; michael@0: using std::make_pair; michael@0: using std::pair; michael@0: using std::string; michael@0: using std::vector; michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: bool DumpSymbols::Read(NSString *filename) { michael@0: if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) { michael@0: fprintf(stderr, "Object file does not exist: %s\n", michael@0: [filename fileSystemRepresentation]); michael@0: return false; michael@0: } michael@0: michael@0: input_pathname_ = [filename retain]; michael@0: michael@0: // Does this filename refer to a dSYM bundle? michael@0: NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_]; michael@0: michael@0: if (bundle) { michael@0: // Filenames referring to bundles usually have names of the form michael@0: // ".dSYM"; however, if the user has specified a wrapper michael@0: // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings), michael@0: // then the name may have the form "..dSYM". In michael@0: // either case, the resource name for the file containing the DWARF michael@0: // info within the bundle is . michael@0: // michael@0: // Since there's no way to tell how much to strip off, remove one michael@0: // extension at a time, and use the first one that michael@0: // pathForResource:ofType:inDirectory likes. michael@0: NSString *base_name = [input_pathname_ lastPathComponent]; michael@0: NSString *dwarf_resource; michael@0: michael@0: do { michael@0: NSString *new_base_name = [base_name stringByDeletingPathExtension]; michael@0: michael@0: // If stringByDeletingPathExtension returned the name unchanged, then michael@0: // there's nothing more for us to strip off --- lose. michael@0: if ([new_base_name isEqualToString:base_name]) { michael@0: fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n", michael@0: [input_pathname_ fileSystemRepresentation]); michael@0: return false; michael@0: } michael@0: michael@0: // Take the shortened result as our new base_name. michael@0: base_name = new_base_name; michael@0: michael@0: // Try to find a DWARF resource in the bundle under the new base_name. michael@0: dwarf_resource = [bundle pathForResource:base_name michael@0: ofType:nil inDirectory:@"DWARF"]; michael@0: } while (!dwarf_resource); michael@0: michael@0: object_filename_ = [dwarf_resource retain]; michael@0: } else { michael@0: object_filename_ = [input_pathname_ retain]; michael@0: } michael@0: michael@0: // Read the file's contents into memory. michael@0: // michael@0: // The documentation for dataWithContentsOfMappedFile says: michael@0: // michael@0: // Because of file mapping restrictions, this method should only be michael@0: // used if the file is guaranteed to exist for the duration of the michael@0: // data object’s existence. It is generally safer to use the michael@0: // dataWithContentsOfFile: method. michael@0: // michael@0: // I gather this means that OS X doesn't have (or at least, that method michael@0: // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the michael@0: // process appears to get its own copy of the data, and changes to the michael@0: // file don't affect memory and vice versa). michael@0: NSError *error; michael@0: contents_ = [NSData dataWithContentsOfFile:object_filename_ michael@0: options:0 michael@0: error:&error]; michael@0: if (!contents_) { michael@0: fprintf(stderr, "Error reading object file: %s: %s\n", michael@0: [object_filename_ fileSystemRepresentation], michael@0: [[error localizedDescription] UTF8String]); michael@0: return false; michael@0: } michael@0: [contents_ retain]; michael@0: michael@0: // Get the list of object files present in the file. michael@0: FatReader::Reporter fat_reporter([object_filename_ michael@0: fileSystemRepresentation]); michael@0: FatReader fat_reader(&fat_reporter); michael@0: if (!fat_reader.Read(reinterpret_cast([contents_ bytes]), michael@0: [contents_ length])) { michael@0: return false; michael@0: } michael@0: michael@0: // Get our own copy of fat_reader's object file list. michael@0: size_t object_files_count; michael@0: const struct fat_arch *object_files = michael@0: fat_reader.object_files(&object_files_count); michael@0: if (object_files_count == 0) { michael@0: fprintf(stderr, "Fat binary file contains *no* architectures: %s\n", michael@0: [object_filename_ fileSystemRepresentation]); michael@0: return false; michael@0: } michael@0: object_files_.resize(object_files_count); michael@0: memcpy(&object_files_[0], object_files, michael@0: sizeof(struct fat_arch) * object_files_count); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type, michael@0: cpu_subtype_t cpu_subtype) { michael@0: // Find the best match for the architecture the user requested. michael@0: const struct fat_arch *best_match michael@0: = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0], michael@0: static_cast(object_files_.size())); michael@0: if (!best_match) return false; michael@0: michael@0: // Record the selected object file. michael@0: selected_object_file_ = best_match; michael@0: return true; michael@0: } michael@0: michael@0: bool DumpSymbols::SetArchitecture(const std::string &arch_name) { michael@0: bool arch_set = false; michael@0: const NXArchInfo *arch_info = michael@0: google_breakpad::BreakpadGetArchInfoFromName(arch_name.c_str()); michael@0: if (arch_info) { michael@0: arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype); michael@0: } michael@0: return arch_set; michael@0: } michael@0: michael@0: string DumpSymbols::Identifier() { michael@0: FileID file_id([object_filename_ fileSystemRepresentation]); michael@0: unsigned char identifier_bytes[16]; michael@0: cpu_type_t cpu_type = selected_object_file_->cputype; michael@0: cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype; michael@0: if (!file_id.MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) { michael@0: fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n", michael@0: [object_filename_ fileSystemRepresentation]); michael@0: return ""; michael@0: } michael@0: michael@0: char identifier_string[40]; michael@0: FileID::ConvertIdentifierToString(identifier_bytes, identifier_string, michael@0: sizeof(identifier_string)); michael@0: michael@0: string compacted(identifier_string); michael@0: for(size_t i = compacted.find('-'); i != string::npos; michael@0: i = compacted.find('-', i)) michael@0: compacted.erase(i, 1); michael@0: michael@0: return compacted; michael@0: } michael@0: michael@0: // A line-to-module loader that accepts line number info parsed by michael@0: // dwarf2reader::LineInfo and populates a Module and a line vector michael@0: // with the results. michael@0: class DumpSymbols::DumperLineToModule: michael@0: public DwarfCUToModule::LineToModuleHandler { michael@0: public: michael@0: // Create a line-to-module converter using BYTE_READER. michael@0: DumperLineToModule(dwarf2reader::ByteReader *byte_reader) michael@0: : byte_reader_(byte_reader) { } michael@0: michael@0: void StartCompilationUnit(const string& compilation_dir) { michael@0: compilation_dir_ = compilation_dir; michael@0: } michael@0: michael@0: void ReadProgram(const char *program, uint64 length, michael@0: Module *module, vector *lines) { michael@0: DwarfLineToModule handler(module, compilation_dir_, lines); michael@0: dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); michael@0: parser.Start(); michael@0: } michael@0: private: michael@0: string compilation_dir_; michael@0: dwarf2reader::ByteReader *byte_reader_; // WEAK michael@0: }; michael@0: michael@0: bool DumpSymbols::ReadDwarf(google_breakpad::Module *module, michael@0: const mach_o::Reader &macho_reader, michael@0: const mach_o::SectionMap &dwarf_sections) const { michael@0: // Build a byte reader of the appropriate endianness. michael@0: ByteReader byte_reader(macho_reader.big_endian() michael@0: ? dwarf2reader::ENDIANNESS_BIG michael@0: : dwarf2reader::ENDIANNESS_LITTLE); michael@0: michael@0: // Construct a context for this file. michael@0: DwarfCUToModule::FileContext file_context(selected_object_name_, michael@0: module); michael@0: michael@0: // Build a dwarf2reader::SectionMap from our mach_o::SectionMap. michael@0: for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin(); michael@0: it != dwarf_sections.end(); it++) { michael@0: file_context.section_map[it->first] = michael@0: make_pair(reinterpret_cast(it->second.contents.start), michael@0: it->second.contents.Size()); michael@0: } michael@0: michael@0: // Find the __debug_info section. michael@0: std::pair debug_info_section michael@0: = file_context.section_map["__debug_info"]; michael@0: // There had better be a __debug_info section! michael@0: if (!debug_info_section.first) { michael@0: fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n", michael@0: selected_object_name_.c_str()); michael@0: return false; michael@0: } michael@0: michael@0: // Build a line-to-module loader for the root handler to use. michael@0: DumperLineToModule line_to_module(&byte_reader); michael@0: michael@0: // Walk the __debug_info section, one compilation unit at a time. michael@0: uint64 debug_info_length = debug_info_section.second; michael@0: for (uint64 offset = 0; offset < debug_info_length;) { michael@0: // Make a handler for the root DIE that populates MODULE with the michael@0: // debug info. michael@0: DwarfCUToModule::WarningReporter reporter(selected_object_name_, michael@0: offset); michael@0: DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); michael@0: // Make a Dwarf2Handler that drives our DIEHandler. michael@0: dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); michael@0: // Make a DWARF parser for the compilation unit at OFFSET. michael@0: dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map, michael@0: offset, michael@0: &byte_reader, michael@0: &die_dispatcher); michael@0: // Process the entire compilation unit; get the offset of the next. michael@0: offset += dwarf_reader.Start(); michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool DumpSymbols::ReadCFI(google_breakpad::Module *module, michael@0: const mach_o::Reader &macho_reader, michael@0: const mach_o::Section §ion, michael@0: bool eh_frame) const { michael@0: // Find the appropriate set of register names for this file's michael@0: // architecture. michael@0: vector register_names; michael@0: switch (macho_reader.cpu_type()) { michael@0: case CPU_TYPE_X86: michael@0: register_names = DwarfCFIToModule::RegisterNames::I386(); michael@0: break; michael@0: case CPU_TYPE_X86_64: michael@0: register_names = DwarfCFIToModule::RegisterNames::X86_64(); michael@0: break; michael@0: case CPU_TYPE_ARM: michael@0: register_names = DwarfCFIToModule::RegisterNames::ARM(); michael@0: break; michael@0: default: { michael@0: const NXArchInfo *arch = google_breakpad::BreakpadGetArchInfoFromCpuType( michael@0: macho_reader.cpu_type(), macho_reader.cpu_subtype()); michael@0: fprintf(stderr, "%s: cannot convert DWARF call frame information for ", michael@0: selected_object_name_.c_str()); michael@0: if (arch) michael@0: fprintf(stderr, "architecture '%s'", arch->name); michael@0: else michael@0: fprintf(stderr, "architecture %d,%d", michael@0: macho_reader.cpu_type(), macho_reader.cpu_subtype()); michael@0: fprintf(stderr, " to Breakpad symbol file: no register name table\n"); michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: // Find the call frame information and its size. michael@0: const char *cfi = reinterpret_cast(section.contents.start); michael@0: size_t cfi_size = section.contents.Size(); michael@0: michael@0: // Plug together the parser, handler, and their entourages. michael@0: DwarfCFIToModule::Reporter module_reporter(selected_object_name_, michael@0: section.section_name); michael@0: DwarfCFIToModule handler(module, register_names, &module_reporter); michael@0: dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ? michael@0: dwarf2reader::ENDIANNESS_BIG : michael@0: dwarf2reader::ENDIANNESS_LITTLE); michael@0: byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4); michael@0: // At the moment, according to folks at Apple and some cursory michael@0: // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so michael@0: // this is the only base address the CFI parser will need. michael@0: byte_reader.SetCFIDataBase(section.address, cfi); michael@0: michael@0: dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_, michael@0: section.section_name); michael@0: dwarf2reader::CallFrameInfo parser(cfi, cfi_size, michael@0: &byte_reader, &handler, &dwarf_reporter, michael@0: eh_frame); michael@0: parser.Start(); michael@0: return true; michael@0: } michael@0: michael@0: // A LoadCommandHandler that loads whatever debugging data it finds into a michael@0: // Module. michael@0: class DumpSymbols::LoadCommandDumper: michael@0: public mach_o::Reader::LoadCommandHandler { michael@0: public: michael@0: // Create a load command dumper handling load commands from READER's michael@0: // file, and adding data to MODULE. michael@0: LoadCommandDumper(const DumpSymbols &dumper, michael@0: google_breakpad::Module *module, michael@0: const mach_o::Reader &reader, michael@0: SymbolData symbol_data) michael@0: : dumper_(dumper), michael@0: module_(module), michael@0: reader_(reader), michael@0: symbol_data_(symbol_data) { } michael@0: michael@0: bool SegmentCommand(const mach_o::Segment &segment); michael@0: bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings); michael@0: michael@0: private: michael@0: const DumpSymbols &dumper_; michael@0: google_breakpad::Module *module_; // WEAK michael@0: const mach_o::Reader &reader_; michael@0: const SymbolData symbol_data_; michael@0: }; michael@0: michael@0: bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) { michael@0: mach_o::SectionMap section_map; michael@0: if (!reader_.MapSegmentSections(segment, §ion_map)) michael@0: return false; michael@0: michael@0: if (segment.name == "__TEXT" && symbol_data_ != NO_CFI) { michael@0: module_->SetLoadAddress(segment.vmaddr); michael@0: mach_o::SectionMap::const_iterator eh_frame = michael@0: section_map.find("__eh_frame"); michael@0: if (eh_frame != section_map.end()) { michael@0: // If there is a problem reading this, don't treat it as a fatal error. michael@0: dumper_.ReadCFI(module_, reader_, eh_frame->second, true); michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: if (segment.name == "__DWARF") { michael@0: if (symbol_data_ != ONLY_CFI) { michael@0: if (!dumper_.ReadDwarf(module_, reader_, section_map)) michael@0: return false; michael@0: } michael@0: if (symbol_data_ != NO_CFI) { michael@0: mach_o::SectionMap::const_iterator debug_frame michael@0: = section_map.find("__debug_frame"); michael@0: if (debug_frame != section_map.end()) { michael@0: // If there is a problem reading this, don't treat it as a fatal error. michael@0: dumper_.ReadCFI(module_, reader_, debug_frame->second, false); michael@0: } michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries, michael@0: const ByteBuffer &strings) { michael@0: StabsToModule stabs_to_module(module_); michael@0: // Mac OS X STABS are never "unitized", and the size of the 'value' field michael@0: // matches the address size of the executable. michael@0: StabsReader stabs_reader(entries.start, entries.Size(), michael@0: strings.start, strings.Size(), michael@0: reader_.big_endian(), michael@0: reader_.bits_64() ? 8 : 4, michael@0: true, michael@0: &stabs_to_module); michael@0: if (!stabs_reader.Process()) michael@0: return false; michael@0: stabs_to_module.Finalize(); michael@0: return true; michael@0: } michael@0: michael@0: bool DumpSymbols::ReadSymbolData(Module** out_module) { michael@0: // Select an object file, if SetArchitecture hasn't been called to set one michael@0: // explicitly. michael@0: if (!selected_object_file_) { michael@0: // If there's only one architecture, that's the one. michael@0: if (object_files_.size() == 1) michael@0: selected_object_file_ = &object_files_[0]; michael@0: else { michael@0: // Look for an object file whose architecture matches our own. michael@0: const NXArchInfo *local_arch = NXGetLocalArchInfo(); michael@0: if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) { michael@0: fprintf(stderr, "%s: object file contains more than one" michael@0: " architecture, none of which match the current" michael@0: " architecture; specify an architecture explicitly" michael@0: " with '-a ARCH' to resolve the ambiguity\n", michael@0: [object_filename_ fileSystemRepresentation]); michael@0: return false; michael@0: } michael@0: } michael@0: } michael@0: michael@0: assert(selected_object_file_); michael@0: michael@0: // Find the name of the selected file's architecture, to appear in michael@0: // the MODULE record and in error messages. michael@0: const NXArchInfo *selected_arch_info = michael@0: google_breakpad::BreakpadGetArchInfoFromCpuType( michael@0: selected_object_file_->cputype, selected_object_file_->cpusubtype); michael@0: michael@0: const char *selected_arch_name = selected_arch_info->name; michael@0: if (strcmp(selected_arch_name, "i386") == 0) michael@0: selected_arch_name = "x86"; michael@0: michael@0: // Produce a name to use in error messages that includes the michael@0: // filename, and the architecture, if there is more than one. michael@0: selected_object_name_ = [object_filename_ UTF8String]; michael@0: if (object_files_.size() > 1) { michael@0: selected_object_name_ += ", architecture "; michael@0: selected_object_name_ + selected_arch_name; michael@0: } michael@0: michael@0: // Compute a module name, to appear in the MODULE record. michael@0: NSString *module_name = [object_filename_ lastPathComponent]; michael@0: michael@0: // Choose an identifier string, to appear in the MODULE record. michael@0: string identifier = Identifier(); michael@0: if (identifier.empty()) michael@0: return false; michael@0: identifier += "0"; michael@0: michael@0: // Create a module to hold the debugging information. michael@0: scoped_ptr module(new Module([module_name UTF8String], michael@0: "mac", michael@0: selected_arch_name, michael@0: identifier)); michael@0: michael@0: // Parse the selected object file. michael@0: mach_o::Reader::Reporter reporter(selected_object_name_); michael@0: mach_o::Reader reader(&reporter); michael@0: if (!reader.Read(reinterpret_cast([contents_ bytes]) michael@0: + selected_object_file_->offset, michael@0: selected_object_file_->size, michael@0: selected_object_file_->cputype, michael@0: selected_object_file_->cpusubtype)) michael@0: return false; michael@0: michael@0: // Walk its load commands, and deal with whatever is there. michael@0: LoadCommandDumper load_command_dumper(*this, module.get(), reader, michael@0: symbol_data_); michael@0: if (!reader.WalkLoadCommands(&load_command_dumper)) michael@0: return false; michael@0: michael@0: *out_module = module.release(); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool DumpSymbols::WriteSymbolFile(std::ostream &stream) { michael@0: Module* module = NULL; michael@0: michael@0: if (ReadSymbolData(&module) && module) { michael@0: bool res = module->Write(stream, symbol_data_); michael@0: delete module; michael@0: return res; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: } // namespace google_breakpad