The Tor Browser: comparison toolkit/crashreporter/tools/symbolstore.py

--1:000000000000
+:d6e7d6fae03a
+#!/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
+#                                <debug info files or dirs>
+#   Runs dump_syms on each debug info file specified on the command line,
+#   then places the resulting symbol file in the proper directory
+#   structure in the symbol store path.  Accepts multiple files
+#   on the command line, so can be called as part of a pipe using
+#   find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
+#   But really, you might just want to pass it <dir>.
+#
+#   Parameters accepted:
+#     -c           : Copy debug info files to the same directory structure
+#                    as sym files
+#     -a "<archs>" : Run dump_syms -a <arch> for each space separated
+#                    cpu architecture in <archs> (only on OS X)
+#     -s <srcdir>  : Use <srcdir> as the top source directory to
+#                    generate relative filenames.
+import sys
+import platform
+import os
+import re
+import shutil
+import textwrap
+import fnmatch
+import subprocess
+import urlparse
+import multiprocessing
+import collections
+from optparse import OptionParser
+from xml.dom.minidom import parse
+# Utility classes
+class VCSFileInfo:
+""" A base class for version-controlled file information. Ensures that the
+following attributes are generated only once (successfully):
+self.root
+self.clean_root
+self.revision
+self.filename
+The attributes are generated by a single call to the GetRoot,
+GetRevision, and GetFilename methods. Those methods are explicitly not
+implemented here and must be implemented in derived classes. """
+def __init__(self, file):
+if not file:
+raise ValueError
+self.file = file
+def __getattr__(self, name):
+""" __getattr__ is only called for attributes that are not set on self,
+so setting self.[attr] will prevent future calls to the GetRoot,
+GetRevision, and GetFilename methods. We don't set the values on
+failure on the off chance that a future call might succeed. """
+if name == "root":
+root = self.GetRoot()
+if root:
+self.root = root
+return root
+elif name == "clean_root":
+clean_root = self.GetCleanRoot()
+if clean_root:
+self.clean_root = clean_root
+return clean_root
+elif name == "revision":
+revision = self.GetRevision()
+if revision:
+self.revision = revision
+return revision
+elif name == "filename":
+filename = self.GetFilename()
+if filename:
+self.filename = filename
+return filename
+raise AttributeError
+def GetRoot(self):
+""" This method should return the unmodified root for the file or 'None'
+on failure. """
+raise NotImplementedError
+def GetCleanRoot(self):
+""" This method should return the repository root for the file or 'None'
+on failure. """
+raise NotImplementedErrors
+def GetRevision(self):
+""" This method should return the revision number for the file or 'None'
+on failure. """
+raise NotImplementedError
+def GetFilename(self):
+""" This method should return the repository-specific filename for the
+file or 'None' on failure. """
+raise NotImplementedError
+# This regex separates protocol and optional username/password from a url.
+# For instance, all the following urls will be transformed into
+# 'foo.com/bar':
+#
+#   http://foo.com/bar
+#   svn+ssh://user@foo.com/bar
+#   svn+ssh://user:pass@foo.com/bar
+#
+rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$')
+def read_output(*args):
+(stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate()
+return stdout.rstrip()
+class HGRepoInfo:
+def __init__(self, path):
+self.path = path
+rev = read_output('hg', '-R', path,
+'parent', '--template={node|short}')
+# Look for the default hg path.  If SRVSRV_ROOT is set, we
+# don't bother asking hg.
+hg_root = os.environ.get("SRCSRV_ROOT")
+if hg_root:
+root = hg_root
+else:
+root = read_output('hg', '-R', path,
+'showconfig', 'paths.default')
+if not root:
+print >> sys.stderr, "Failed to get HG Repo for %s" % path
+cleanroot = None
+if root:
+match = rootRegex.match(root)
+if match:
+cleanroot = match.group(1)
+if cleanroot.endswith('/'):
+cleanroot = cleanroot[:-1]
+if cleanroot is None:
+print >> sys.stderr, textwrap.dedent("""\
+Could not determine repo info for %s.  This is either not a clone of the web-based
+repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path
+sys.exit(1)
+self.rev = rev
+self.root = root
+self.cleanroot = cleanroot
+def GetFileInfo(self, file):
+return HGFileInfo(file, self)
+class HGFileInfo(VCSFileInfo):
+def __init__(self, file, repo):
+VCSFileInfo.__init__(self, file)
+self.repo = repo
+self.file = os.path.relpath(file, repo.path)
+def GetRoot(self):
+return self.repo.root
+def GetCleanRoot(self):
+return self.repo.cleanroot
+def GetRevision(self):
+return self.repo.rev
+def GetFilename(self):
+if self.revision and self.clean_root:
+return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision)
+return self.file
+class GitRepoInfo:
+"""
+Info about a local git repository. Does not currently
+support discovering info about a git clone, the info must be
+provided out-of-band.
+"""
+def __init__(self, path, rev, root):
+self.path = path
+cleanroot = None
+if root:
+match = rootRegex.match(root)
+if match:
+cleanroot = match.group(1)
+if cleanroot.endswith('/'):
+cleanroot = cleanroot[:-1]
+if cleanroot is None:
+print >> sys.stderr, textwrap.dedent("""\
+Could not determine repo info for %s (%s).  This is either not a clone of a web-based
+repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root)
+sys.exit(1)
+self.rev = rev
+self.cleanroot = cleanroot
+def GetFileInfo(self, file):
+return GitFileInfo(file, self)
+class GitFileInfo(VCSFileInfo):
+def __init__(self, file, repo):
+VCSFileInfo.__init__(self, file)
+self.repo = repo
+self.file = os.path.relpath(file, repo.path)
+def GetRoot(self):
+return self.repo.path
+def GetCleanRoot(self):
+return self.repo.cleanroot
+def GetRevision(self):
+return self.repo.rev
+def GetFilename(self):
+if self.revision and self.clean_root:
+return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
+return self.file
+# Utility functions
+# A cache of files for which VCS info has already been determined. Used to
+# prevent extra filesystem activity or process launching.
+vcsFileInfoCache = {}
+def IsInDir(file, dir):
+# the lower() is to handle win32+vc8, where
+# the source filenames come out all lowercase,
+# but the srcdir can be mixed case
+return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
+def GetVCSFilenameFromSrcdir(file, srcdir):
+if srcdir not in Dumper.srcdirRepoInfo:
+# Not in cache, so find it adnd cache it
+if os.path.isdir(os.path.join(srcdir, '.hg')):
+Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
+else:
+# Unknown VCS or file is not in a repo.
+return None
+return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file)
+def GetVCSFilename(file, srcdirs):
+"""Given a full path to a file, and the top source directory,
+look for version control information about this file, and return
+a tuple containing
+1) a specially formatted filename that contains the VCS type,
+VCS location, relative filename, and revision number, formatted like:
+vcs:vcs location:filename:revision
+For example:
+cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36
+2) the unmodified root information if it exists"""
+(path, filename) = os.path.split(file)
+if path == '' or filename == '':
+return (file, None)
+fileInfo = None
+root = ''
+if file in vcsFileInfoCache:
+# Already cached this info, use it.
+fileInfo = vcsFileInfoCache[file]
+else:
+for srcdir in srcdirs:
+if not IsInDir(file, srcdir):
+continue
+fileInfo = GetVCSFilenameFromSrcdir(file, srcdir)
+if fileInfo:
+vcsFileInfoCache[file] = fileInfo
+break
+if fileInfo:
+file = fileInfo.filename
+root = fileInfo.root
+# we want forward slashes on win32 paths
+return (file.replace("\\", "/"), root)
+def GetPlatformSpecificDumper(**kwargs):
+"""This function simply returns a instance of a subclass of Dumper
+that is appropriate for the current platform."""
+# Python 2.5 has a bug where platform.system() returns 'Microsoft'.
+# Remove this when we no longer support Python 2.5.
+return {'Windows': Dumper_Win32,
+'Microsoft': Dumper_Win32,
+'Linux': Dumper_Linux,
+'Sunos5': Dumper_Solaris,
+'Darwin': Dumper_Mac}[platform.system()](**kwargs)
+def SourceIndex(fileStream, outputPath, vcs_root):
+"""Takes a list of files, writes info to a data block in a .stream file"""
+# Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
+# Create the srcsrv data block that indexes the pdb file
+result = True
+pdbStreamFile = open(outputPath, "w")
+pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''')
+pdbStreamFile.write(vcs_root)
+pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''')
+pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above
+pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n")
+pdbStreamFile.close()
+return result
+def WorkerInitializer(cls, lock, srcdirRepoInfo):
+"""Windows worker processes won't have run GlobalInit, and due to a lack of fork(),
+won't inherit the class variables from the parent. They only need a few variables,
+so we run an initializer to set them. Redundant but harmless on other platforms."""
+cls.lock = lock
+cls.srcdirRepoInfo = srcdirRepoInfo
+def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg):
+"""multiprocessing can't handle methods as Process targets, so we define
+a simple wrapper function around the work method."""
+return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg)
+class Dumper:
+"""This class can dump symbols from a file with debug info, and
+store the output in a directory structure that is valid for use as
+a Breakpad symbol server.  Requires a path to a dump_syms binary--
+|dump_syms| and a directory to store symbols in--|symbol_path|.
+Optionally takes a list of processor architectures to process from
+each debug file--|archs|, the full path to the top source
+directory--|srcdir|, for generating relative source file names,
+and an option to copy debug info files alongside the dumped
+symbol files--|copy_debug|, mostly useful for creating a
+Microsoft Symbol Server from the resulting output.
+You don't want to use this directly if you intend to call
+ProcessDir.  Instead, call GetPlatformSpecificDumper to
+get an instance of a subclass.
+Processing is performed asynchronously via worker processes; in
+order to wait for processing to finish and cleanup correctly, you
+must call Finish after all Process/ProcessDir calls have been made.
+You must also call Dumper.GlobalInit before creating or using any
+instances."""
+def __init__(self, dump_syms, symbol_path,
+archs=None,
+srcdirs=[],
+copy_debug=False,
+vcsinfo=False,
+srcsrv=False,
+exclude=[],
+repo_manifest=None):
+# popen likes absolute paths, at least on windows
+self.dump_syms = os.path.abspath(dump_syms)
+self.symbol_path = symbol_path
+if archs is None:
+# makes the loop logic simpler
+self.archs = ['']
+else:
+self.archs = ['-a %s' % a for a in archs.split()]
+self.srcdirs = [os.path.normpath(a) for a in srcdirs]
+self.copy_debug = copy_debug
+self.vcsinfo = vcsinfo
+self.srcsrv = srcsrv
+self.exclude = exclude[:]
+if repo_manifest:
+self.parse_repo_manifest(repo_manifest)
+# book-keeping to keep track of our jobs and the cleanup work per file tuple
+self.files_record = {}
+self.jobs_record = collections.defaultdict(int)
+@classmethod
+def GlobalInit(cls, module=multiprocessing):
+"""Initialize the class globals for the multiprocessing setup; must
+be called before any Dumper instances are created and used. Test cases
+may pass in a different module to supply Manager and Pool objects,
+usually multiprocessing.dummy."""
+num_cpus = module.cpu_count()
+if num_cpus is None:
+# assume a dual core machine if we can't find out for some reason
+# probably better on single core anyway due to I/O constraints
+num_cpus = 2
+# have to create any locks etc before the pool
+cls.manager = module.Manager()
+cls.jobs_condition = Dumper.manager.Condition()
+cls.lock = Dumper.manager.RLock()
+cls.srcdirRepoInfo = Dumper.manager.dict()
+cls.pool = module.Pool(num_cpus, WorkerInitializer,
+(cls, cls.lock, cls.srcdirRepoInfo))
+def JobStarted(self, file_key):
+"""Increments the number of submitted jobs for the specified key file,
+defined as the original file we processed; note that a single key file
+can generate up to 1 + len(self.archs) jobs in the Mac case."""
+with Dumper.jobs_condition:
+self.jobs_record[file_key] += 1
+Dumper.jobs_condition.notify_all()
+def JobFinished(self, file_key):
+"""Decrements the number of submitted jobs for the specified key file,
+defined as the original file we processed; once the count is back to 0,
+remove the entry from our record."""
+with Dumper.jobs_condition:
+self.jobs_record[file_key] -= 1
+if self.jobs_record[file_key] == 0:
+del self.jobs_record[file_key]
+Dumper.jobs_condition.notify_all()
+def output(self, dest, output_str):
+"""Writes |output_str| to |dest|, holding |lock|;
+terminates with a newline."""
+with Dumper.lock:
+dest.write(output_str + "\n")
+dest.flush()
+def output_pid(self, dest, output_str):
+"""Debugging output; prepends the pid to the string."""
+self.output(dest, "%d: %s" % (os.getpid(), output_str))
+def parse_repo_manifest(self, repo_manifest):
+"""
+Parse an XML manifest of repository info as produced
+by the `repo manifest -r` command.
+"""
+doc = parse(repo_manifest)
+if doc.firstChild.tagName != "manifest":
+return
+# First, get remotes.
+def ensure_slash(u):
+if not u.endswith("/"):
+return u + "/"
+return u
+remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")])
+# And default remote.
+default_remote = None
+if doc.getElementsByTagName("default"):
+default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote")
+# Now get projects. Assume they're relative to repo_manifest.
+base_dir = os.path.abspath(os.path.dirname(repo_manifest))
+for proj in doc.getElementsByTagName("project"):
+# name is the repository URL relative to the remote path.
+name = proj.getAttribute("name")
+# path is the path on-disk, relative to the manifest file.
+path = proj.getAttribute("path")
+# revision is the changeset ID.
+rev = proj.getAttribute("revision")
+# remote is the base URL to use.
+remote = proj.getAttribute("remote")
+# remote defaults to the <default remote>.
+if not remote:
+remote = default_remote
+# path defaults to name.
+if not path:
+path = name
+if not (name and path and rev and remote):
+print "Skipping project %s" % proj.toxml()
+continue
+remote = remotes[remote]
+# Turn git URLs into http URLs so that urljoin works.
+if remote.startswith("git:"):
+remote = "http" + remote[3:]
+# Add this project to srcdirs.
+srcdir = os.path.join(base_dir, path)
+self.srcdirs.append(srcdir)
+# And cache its VCS file info. Currently all repos mentioned
+# in a repo manifest are assumed to be git.
+root = urlparse.urljoin(remote, name)
+Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
+# subclasses override this
+def ShouldProcess(self, file):
+return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude)
+# and can override this
+def ShouldSkipDir(self, dir):
+return False
+def RunFileCommand(self, file):
+"""Utility function, returns the output of file(1)"""
+try:
+# we use -L to read the targets of symlinks,
+# and -b to print just the content, not the filename
+return os.popen("file -Lb " + file).read()
+except:
+return ""
+# This is a no-op except on Win32
+def FixFilenameCase(self, file):
+return file
+# This is a no-op except on Win32
+def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
+return ""
+# subclasses override this if they want to support this
+def CopyDebug(self, file, debug_file, guid):
+pass
+def Finish(self, stop_pool=True):
+"""Wait for the expected number of jobs to be submitted, and then
+wait for the pool to finish processing them. By default, will close
+and clear the pool, but for testcases that need multiple runs, pass
+stop_pool = False."""
+with Dumper.jobs_condition:
+while len(self.jobs_record) != 0:
+Dumper.jobs_condition.wait()
+if stop_pool:
+Dumper.pool.close()
+Dumper.pool.join()
+def Process(self, file_or_dir):
+"""Process a file or all the (valid) files in a directory; processing is performed
+asynchronously, and Finish must be called to wait for it complete and cleanup."""
+if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir):
+self.ProcessDir(file_or_dir)
+elif os.path.isfile(file_or_dir):
+self.ProcessFiles((file_or_dir,))
+def ProcessDir(self, dir):
+"""Process all the valid files in this directory.  Valid files
+are determined by calling ShouldProcess; processing is performed
+asynchronously, and Finish must be called to wait for it complete and cleanup."""
+for root, dirs, files in os.walk(dir):
+for d in dirs[:]:
+if self.ShouldSkipDir(d):
+dirs.remove(d)
+for f in files:
+fullpath = os.path.join(root, f)
+if self.ShouldProcess(fullpath):
+self.ProcessFiles((fullpath,))
+def SubmitJob(self, file_key, func, args, callback):
+"""Submits a job to the pool of workers; increments the number of submitted jobs."""
+self.JobStarted(file_key)
+res = Dumper.pool.apply_async(func, args=args, callback=callback)
+def ProcessFilesFinished(self, res):
+"""Callback from multiprocesing when ProcessFilesWork finishes;
+run the cleanup work, if any"""
+self.JobFinished(res['files'][-1])
+# only run the cleanup function once per tuple of files
+self.files_record[res['files']] += 1
+if self.files_record[res['files']] == len(self.archs):
+del self.files_record[res['files']]
+if res['after']:
+res['after'](res['status'], res['after_arg'])
+def ProcessFiles(self, files, after=None, after_arg=None):
+"""Dump symbols from these files into a symbol file, stored
+in the proper directory structure in  |symbol_path|; processing is performed
+asynchronously, and Finish must be called to wait for it complete and cleanup.
+All files after the first are fallbacks in case the first file does not process
+successfully; if it does, no other files will be touched."""
+self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files))
+# tries to get the vcs root from the .mozconfig first - if it's not set
+# the tinderbox vcs path will be assigned further down
+vcs_root = os.environ.get("SRCSRV_ROOT")
+for arch_num, arch in enumerate(self.archs):
+self.files_record[files] = 0 # record that we submitted jobs for this tuple of files
+self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished)
+def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg):
+self.output_pid(sys.stderr, "Worker processing files: %s" % (files,))
+# our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on
+result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files }
+sourceFileStream = ''
+for file in files:
+# files is a tuple of files, containing fallbacks in case the first file doesn't process successfully
+try:
+proc = subprocess.Popen([self.dump_syms] + arch.split() + [file],
+stdout=subprocess.PIPE)
+module_line = proc.stdout.next()
+if module_line.startswith("MODULE"):
+# MODULE os cpu guid debug_file
+(guid, debug_file) = (module_line.split())[3:5]
+# strip off .pdb extensions, and append .sym
+sym_file = re.sub("\.pdb$", "", debug_file) + ".sym"
+# we do want forward slashes here
+rel_path = os.path.join(debug_file,
+guid,
+sym_file).replace("\\", "/")
+full_path = os.path.normpath(os.path.join(self.symbol_path,
+rel_path))
+try:
+os.makedirs(os.path.dirname(full_path))
+except OSError: # already exists
+pass
+f = open(full_path, "w")
+f.write(module_line)
+# now process the rest of the output
+for line in proc.stdout:
+if line.startswith("FILE"):
+# FILE index filename
+(x, index, filename) = line.rstrip().split(None, 2)
+if sys.platform == "sunos5":
+for srcdir in self.srcdirs:
+start = filename.find(self.srcdir)
+if start != -1:
+filename = filename[start:]
+break
+filename = self.FixFilenameCase(filename)
+sourcepath = filename
+if self.vcsinfo:
+(filename, rootname) = GetVCSFilename(filename, self.srcdirs)
+# sets vcs_root in case the loop through files were to end on an empty rootname
+if vcs_root is None:
+if rootname:
+vcs_root = rootname
+# gather up files with hg for indexing
+if filename.startswith("hg"):
+(ver, checkout, source_file, revision) = filename.split(":", 3)
+sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n"
+f.write("FILE %s %s\n" % (index, filename))
+else:
+# pass through all other lines unchanged
+f.write(line)
+# we want to return true only if at least one line is not a MODULE or FILE line
+result['status'] = True
+f.close()
+proc.wait()
+# we output relative paths so callers can get a list of what
+# was generated
+self.output(sys.stdout, rel_path)
+if self.srcsrv and vcs_root:
+# add source server indexing to the pdb file
+self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root)
+# only copy debug the first time if we have multiple architectures
+if self.copy_debug and arch_num == 0:
+self.CopyDebug(file, debug_file, guid)
+except StopIteration:
+pass
+except e:
+self.output(sys.stderr, "Unexpected error: %s" % (str(e),))
+raise
+if result['status']:
+# we only need 1 file to work
+break
+return result
+# Platform-specific subclasses.  For the most part, these just have
+# logic to determine what files to extract symbols from.
+class Dumper_Win32(Dumper):
+fixedFilenameCaseCache = {}
+def ShouldProcess(self, file):
+"""This function will allow processing of pdb files that have dll
+or exe files with the same base name next to them."""
+if not Dumper.ShouldProcess(self, file):
+return False
+if file.endswith(".pdb"):
+(path,ext) = os.path.splitext(file)
+if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"):
+return True
+return False
+def FixFilenameCase(self, file):
+"""Recent versions of Visual C++ put filenames into
+PDB files as all lowercase.  If the file exists
+on the local filesystem, fix it."""
+# Use a cached version if we have one.
+if file in self.fixedFilenameCaseCache:
+return self.fixedFilenameCaseCache[file]
+result = file
+(path, filename) = os.path.split(file)
+if os.path.isdir(path):
+lc_filename = filename.lower()
+for f in os.listdir(path):
+if f.lower() == lc_filename:
+result = os.path.join(path, f)
+break
+# Cache the corrected version to avoid future filesystem hits.
+self.fixedFilenameCaseCache[file] = result
+return result
+def CopyDebug(self, file, debug_file, guid):
+rel_path = os.path.join(debug_file,
+guid,
+debug_file).replace("\\", "/")
+full_path = os.path.normpath(os.path.join(self.symbol_path,
+rel_path))
+shutil.copyfile(file, full_path)
+# try compressing it
+compressed_file = os.path.splitext(full_path)[0] + ".pd_"
+# ignore makecab's output
+success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D",
+"CompressionMemory=21",
+full_path, compressed_file],
+stdout=open("NUL:","w"), stderr=subprocess.STDOUT)
+if success == 0 and os.path.exists(compressed_file):
+os.unlink(full_path)
+self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_")
+else:
+self.output(sys.stdout, rel_path)
+def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
+# Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
+debug_file = os.path.abspath(debug_file)
+streamFilename = debug_file + ".stream"
+stream_output_path = os.path.abspath(streamFilename)
+# Call SourceIndex to create the .stream file
+result = SourceIndex(sourceFileStream, stream_output_path, vcs_root)
+if self.copy_debug:
+pdbstr_path = os.environ.get("PDBSTR_PATH")
+pdbstr = os.path.normpath(pdbstr_path)
+subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file),
+"-i:" + os.path.basename(streamFilename), "-s:srcsrv"],
+cwd=os.path.dirname(stream_output_path))
+# clean up all the .stream files when done
+os.remove(stream_output_path)
+return result
+class Dumper_Linux(Dumper):
+objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy'
+def ShouldProcess(self, file):
+"""This function will allow processing of files that are
+executable, or end with the .so extension, and additionally
+file(1) reports as being ELF files.  It expects to find the file
+command in PATH."""
+if not Dumper.ShouldProcess(self, file):
+return False
+if file.endswith(".so") or os.access(file, os.X_OK):
+return self.RunFileCommand(file).startswith("ELF")
+return False
+def CopyDebug(self, file, debug_file, guid):
+# We want to strip out the debug info, and add a
+# .gnu_debuglink section to the object, so the debugger can
+# actually load our debug info later.
+file_dbg = file + ".dbg"
+if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \
+subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0:
+rel_path = os.path.join(debug_file,
+guid,
+debug_file + ".dbg")
+full_path = os.path.normpath(os.path.join(self.symbol_path,
+rel_path))
+shutil.move(file_dbg, full_path)
+# gzip the shipped debug files
+os.system("gzip %s" % full_path)
+self.output(sys.stdout, rel_path + ".gz")
+else:
+if os.path.isfile(file_dbg):
+os.unlink(file_dbg)
+class Dumper_Solaris(Dumper):
+def RunFileCommand(self, file):
+"""Utility function, returns the output of file(1)"""
+try:
+output = os.popen("file " + file).read()
+return output.split('\t')[1];
+except:
+return ""
+def ShouldProcess(self, file):
+"""This function will allow processing of files that are
+executable, or end with the .so extension, and additionally
+file(1) reports as being ELF files.  It expects to find the file
+command in PATH."""
+if not Dumper.ShouldProcess(self, file):
+return False
+if file.endswith(".so") or os.access(file, os.X_OK):
+return self.RunFileCommand(file).startswith("ELF")
+return False
+def StartProcessFilesWorkMac(dumper, file):
+"""multiprocessing can't handle methods as Process targets, so we define
+a simple wrapper function around the work method."""
+return dumper.ProcessFilesWorkMac(file)
+def AfterMac(status, dsymbundle):
+"""Cleanup function to run on Macs after we process the file(s)."""
+# CopyDebug will already have been run from Dumper.ProcessFiles
+shutil.rmtree(dsymbundle)
+class Dumper_Mac(Dumper):
+def ShouldProcess(self, file):
+"""This function will allow processing of files that are
+executable, or end with the .dylib extension, and additionally
+file(1) reports as being Mach-O files.  It expects to find the file
+command in PATH."""
+if not Dumper.ShouldProcess(self, file):
+return False
+if file.endswith(".dylib") or os.access(file, os.X_OK):
+return self.RunFileCommand(file).startswith("Mach-O")
+return False
+def ShouldSkipDir(self, dir):
+"""We create .dSYM bundles on the fly, but if someone runs
+buildsymbols twice, we should skip any bundles we created
+previously, otherwise we'll recurse into them and try to
+dump the inner bits again."""
+if dir.endswith(".dSYM"):
+return True
+return False
+def ProcessFiles(self, files, after=None, after_arg=None):
+# also note, files must be len 1 here, since we're the only ones
+# that ever add more than one file to the list
+self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0]))
+self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished)
+def ProcessFilesMacFinished(self, result):
+if result['status']:
+# kick off new jobs per-arch with our new list of files
+Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0])
+# only decrement jobs *after* that, since otherwise we'll remove the record for this file
+self.JobFinished(result['files'][-1])
+def ProcessFilesWorkMac(self, file):
+"""dump_syms on Mac needs to be run on a dSYM bundle produced
+by dsymutil(1), so run dsymutil here and pass the bundle name
+down to the superclass method instead."""
+self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,))
+# our return is a status and a tuple of files to dump symbols for
+# the extra files are fallbacks; as soon as one is dumped successfully, we stop
+result = { 'status' : False, 'files' : None, 'file_key' : file }
+dsymbundle = file + ".dSYM"
+if os.path.exists(dsymbundle):
+shutil.rmtree(dsymbundle)
+# dsymutil takes --arch=foo instead of -a foo like everything else
+subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a]
++ [file],
+stdout=open("/dev/null","w"))
+if not os.path.exists(dsymbundle):
+# dsymutil won't produce a .dSYM for files without symbols
+self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,))
+result['status'] = False
+result['files'] = (file, )
+return result
+result['status'] = True
+result['files'] = (dsymbundle, file)
+return result
+def CopyDebug(self, file, debug_file, guid):
+"""ProcessFiles has already produced a dSYM bundle, so we should just
+copy that to the destination directory. However, we'll package it
+into a .tar.bz2 because the debug symbols are pretty huge, and
+also because it's a bundle, so it's a directory. |file| here is the
+dSYM bundle, and |debug_file| is the original filename."""
+rel_path = os.path.join(debug_file,
+guid,
+os.path.basename(file) + ".tar.bz2")
+full_path = os.path.abspath(os.path.join(self.symbol_path,
+rel_path))
+success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)],
+cwd=os.path.dirname(file),
+stdout=open("/dev/null","w"), stderr=subprocess.STDOUT)
+if success == 0 and os.path.exists(full_path):
+self.output(sys.stdout, rel_path)
+# Entry point if called as a standalone program
+def main():
+parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>")
+parser.add_option("-c", "--copy",
+action="store_true", dest="copy_debug", default=False,
+help="Copy debug info files into the same directory structure as symbol files")
+parser.add_option("-a", "--archs",
+action="store", dest="archs",
+help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)")
+parser.add_option("-s", "--srcdir",
+action="append", dest="srcdir", default=[],
+help="Use SRCDIR to determine relative paths to source files")
+parser.add_option("-v", "--vcs-info",
+action="store_true", dest="vcsinfo",
+help="Try to retrieve VCS info for each FILE listed in the output")
+parser.add_option("-i", "--source-index",
+action="store_true", dest="srcsrv", default=False,
+help="Add source index information to debug files, making them suitable for use in a source server.")
+parser.add_option("-x", "--exclude",
+action="append", dest="exclude", default=[], metavar="PATTERN",
+help="Skip processing files matching PATTERN.")
+parser.add_option("--repo-manifest",
+action="store", dest="repo_manifest",
+help="""Get source information from this XML manifest
+produced by the `repo manifest -r` command.
+""")
+(options, args) = parser.parse_args()
+#check to see if the pdbstr.exe exists
+if options.srcsrv:
+pdbstr = os.environ.get("PDBSTR_PATH")
+if not os.path.exists(pdbstr):
+print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n"
+sys.exit(1)
+if len(args) < 3:
+parser.error("not enough arguments")
+exit(1)
+dumper = GetPlatformSpecificDumper(dump_syms=args[0],
+symbol_path=args[1],
+copy_debug=options.copy_debug,
+archs=options.archs,
+srcdirs=options.srcdir,
+vcsinfo=options.vcsinfo,
+srcsrv=options.srcsrv,
+exclude=options.exclude,
+repo_manifest=options.repo_manifest)
+for arg in args[2:]:
+dumper.Process(arg)
+dumper.Finish()
+# run main if run directly
+if __name__ == "__main__":
+# set up the multiprocessing infrastructure before we start;
+# note that this needs to be in the __main__ guard, or else Windows will choke
+Dumper.GlobalInit()
+main()

The Tor Browser / file comparison

comparison: toolkit/crashreporter/tools/symbolstore.py

toolkit/crashreporter/tools/symbolstore.py