michael@0: #!/bin/env python michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: # michael@0: # Usage: symbolstore.py michael@0: # michael@0: # Runs dump_syms on each debug info file specified on the command line, michael@0: # then places the resulting symbol file in the proper directory michael@0: # structure in the symbol store path. Accepts multiple files michael@0: # on the command line, so can be called as part of a pipe using michael@0: # find | xargs symbolstore.pl michael@0: # But really, you might just want to pass it . michael@0: # michael@0: # Parameters accepted: michael@0: # -c : Copy debug info files to the same directory structure michael@0: # as sym files michael@0: # -a "" : Run dump_syms -a for each space separated michael@0: # cpu architecture in (only on OS X) michael@0: # -s : Use as the top source directory to michael@0: # generate relative filenames. michael@0: michael@0: import sys michael@0: import platform michael@0: import os michael@0: import re michael@0: import shutil michael@0: import textwrap michael@0: import fnmatch michael@0: import subprocess michael@0: import urlparse michael@0: import multiprocessing michael@0: import collections michael@0: from optparse import OptionParser michael@0: from xml.dom.minidom import parse michael@0: michael@0: # Utility classes michael@0: michael@0: class VCSFileInfo: michael@0: """ A base class for version-controlled file information. Ensures that the michael@0: following attributes are generated only once (successfully): michael@0: michael@0: self.root michael@0: self.clean_root michael@0: self.revision michael@0: self.filename michael@0: michael@0: The attributes are generated by a single call to the GetRoot, michael@0: GetRevision, and GetFilename methods. Those methods are explicitly not michael@0: implemented here and must be implemented in derived classes. """ michael@0: michael@0: def __init__(self, file): michael@0: if not file: michael@0: raise ValueError michael@0: self.file = file michael@0: michael@0: def __getattr__(self, name): michael@0: """ __getattr__ is only called for attributes that are not set on self, michael@0: so setting self.[attr] will prevent future calls to the GetRoot, michael@0: GetRevision, and GetFilename methods. We don't set the values on michael@0: failure on the off chance that a future call might succeed. """ michael@0: michael@0: if name == "root": michael@0: root = self.GetRoot() michael@0: if root: michael@0: self.root = root michael@0: return root michael@0: michael@0: elif name == "clean_root": michael@0: clean_root = self.GetCleanRoot() michael@0: if clean_root: michael@0: self.clean_root = clean_root michael@0: return clean_root michael@0: michael@0: elif name == "revision": michael@0: revision = self.GetRevision() michael@0: if revision: michael@0: self.revision = revision michael@0: return revision michael@0: michael@0: elif name == "filename": michael@0: filename = self.GetFilename() michael@0: if filename: michael@0: self.filename = filename michael@0: return filename michael@0: michael@0: raise AttributeError michael@0: michael@0: def GetRoot(self): michael@0: """ This method should return the unmodified root for the file or 'None' michael@0: on failure. """ michael@0: raise NotImplementedError michael@0: michael@0: def GetCleanRoot(self): michael@0: """ This method should return the repository root for the file or 'None' michael@0: on failure. """ michael@0: raise NotImplementedErrors michael@0: michael@0: def GetRevision(self): michael@0: """ This method should return the revision number for the file or 'None' michael@0: on failure. """ michael@0: raise NotImplementedError michael@0: michael@0: def GetFilename(self): michael@0: """ This method should return the repository-specific filename for the michael@0: file or 'None' on failure. """ michael@0: raise NotImplementedError michael@0: michael@0: michael@0: # This regex separates protocol and optional username/password from a url. michael@0: # For instance, all the following urls will be transformed into michael@0: # 'foo.com/bar': michael@0: # michael@0: # http://foo.com/bar michael@0: # svn+ssh://user@foo.com/bar michael@0: # svn+ssh://user:pass@foo.com/bar michael@0: # michael@0: rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$') michael@0: michael@0: def read_output(*args): michael@0: (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate() michael@0: return stdout.rstrip() michael@0: michael@0: class HGRepoInfo: michael@0: def __init__(self, path): michael@0: self.path = path michael@0: rev = read_output('hg', '-R', path, michael@0: 'parent', '--template={node|short}') michael@0: # Look for the default hg path. If SRVSRV_ROOT is set, we michael@0: # don't bother asking hg. michael@0: hg_root = os.environ.get("SRCSRV_ROOT") michael@0: if hg_root: michael@0: root = hg_root michael@0: else: michael@0: root = read_output('hg', '-R', path, michael@0: 'showconfig', 'paths.default') michael@0: if not root: michael@0: print >> sys.stderr, "Failed to get HG Repo for %s" % path michael@0: cleanroot = None michael@0: if root: michael@0: match = rootRegex.match(root) michael@0: if match: michael@0: cleanroot = match.group(1) michael@0: if cleanroot.endswith('/'): michael@0: cleanroot = cleanroot[:-1] michael@0: if cleanroot is None: michael@0: print >> sys.stderr, textwrap.dedent("""\ michael@0: Could not determine repo info for %s. This is either not a clone of the web-based michael@0: repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path michael@0: sys.exit(1) michael@0: self.rev = rev michael@0: self.root = root michael@0: self.cleanroot = cleanroot michael@0: michael@0: def GetFileInfo(self, file): michael@0: return HGFileInfo(file, self) michael@0: michael@0: class HGFileInfo(VCSFileInfo): michael@0: def __init__(self, file, repo): michael@0: VCSFileInfo.__init__(self, file) michael@0: self.repo = repo michael@0: self.file = os.path.relpath(file, repo.path) michael@0: michael@0: def GetRoot(self): michael@0: return self.repo.root michael@0: michael@0: def GetCleanRoot(self): michael@0: return self.repo.cleanroot michael@0: michael@0: def GetRevision(self): michael@0: return self.repo.rev michael@0: michael@0: def GetFilename(self): michael@0: if self.revision and self.clean_root: michael@0: return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision) michael@0: return self.file michael@0: michael@0: class GitRepoInfo: michael@0: """ michael@0: Info about a local git repository. Does not currently michael@0: support discovering info about a git clone, the info must be michael@0: provided out-of-band. michael@0: """ michael@0: def __init__(self, path, rev, root): michael@0: self.path = path michael@0: cleanroot = None michael@0: if root: michael@0: match = rootRegex.match(root) michael@0: if match: michael@0: cleanroot = match.group(1) michael@0: if cleanroot.endswith('/'): michael@0: cleanroot = cleanroot[:-1] michael@0: if cleanroot is None: michael@0: print >> sys.stderr, textwrap.dedent("""\ michael@0: Could not determine repo info for %s (%s). This is either not a clone of a web-based michael@0: repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root) michael@0: sys.exit(1) michael@0: self.rev = rev michael@0: self.cleanroot = cleanroot michael@0: michael@0: def GetFileInfo(self, file): michael@0: return GitFileInfo(file, self) michael@0: michael@0: class GitFileInfo(VCSFileInfo): michael@0: def __init__(self, file, repo): michael@0: VCSFileInfo.__init__(self, file) michael@0: self.repo = repo michael@0: self.file = os.path.relpath(file, repo.path) michael@0: michael@0: def GetRoot(self): michael@0: return self.repo.path michael@0: michael@0: def GetCleanRoot(self): michael@0: return self.repo.cleanroot michael@0: michael@0: def GetRevision(self): michael@0: return self.repo.rev michael@0: michael@0: def GetFilename(self): michael@0: if self.revision and self.clean_root: michael@0: return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision) michael@0: return self.file michael@0: michael@0: # Utility functions michael@0: michael@0: # A cache of files for which VCS info has already been determined. Used to michael@0: # prevent extra filesystem activity or process launching. michael@0: vcsFileInfoCache = {} michael@0: michael@0: def IsInDir(file, dir): michael@0: # the lower() is to handle win32+vc8, where michael@0: # the source filenames come out all lowercase, michael@0: # but the srcdir can be mixed case michael@0: return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower()) michael@0: michael@0: def GetVCSFilenameFromSrcdir(file, srcdir): michael@0: if srcdir not in Dumper.srcdirRepoInfo: michael@0: # Not in cache, so find it adnd cache it michael@0: if os.path.isdir(os.path.join(srcdir, '.hg')): michael@0: Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir) michael@0: else: michael@0: # Unknown VCS or file is not in a repo. michael@0: return None michael@0: return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file) michael@0: michael@0: def GetVCSFilename(file, srcdirs): michael@0: """Given a full path to a file, and the top source directory, michael@0: look for version control information about this file, and return michael@0: a tuple containing michael@0: 1) a specially formatted filename that contains the VCS type, michael@0: VCS location, relative filename, and revision number, formatted like: michael@0: vcs:vcs location:filename:revision michael@0: For example: michael@0: cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36 michael@0: 2) the unmodified root information if it exists""" michael@0: (path, filename) = os.path.split(file) michael@0: if path == '' or filename == '': michael@0: return (file, None) michael@0: michael@0: fileInfo = None michael@0: root = '' michael@0: if file in vcsFileInfoCache: michael@0: # Already cached this info, use it. michael@0: fileInfo = vcsFileInfoCache[file] michael@0: else: michael@0: for srcdir in srcdirs: michael@0: if not IsInDir(file, srcdir): michael@0: continue michael@0: fileInfo = GetVCSFilenameFromSrcdir(file, srcdir) michael@0: if fileInfo: michael@0: vcsFileInfoCache[file] = fileInfo michael@0: break michael@0: michael@0: if fileInfo: michael@0: file = fileInfo.filename michael@0: root = fileInfo.root michael@0: michael@0: # we want forward slashes on win32 paths michael@0: return (file.replace("\\", "/"), root) michael@0: michael@0: def GetPlatformSpecificDumper(**kwargs): michael@0: """This function simply returns a instance of a subclass of Dumper michael@0: that is appropriate for the current platform.""" michael@0: # Python 2.5 has a bug where platform.system() returns 'Microsoft'. michael@0: # Remove this when we no longer support Python 2.5. michael@0: return {'Windows': Dumper_Win32, michael@0: 'Microsoft': Dumper_Win32, michael@0: 'Linux': Dumper_Linux, michael@0: 'Sunos5': Dumper_Solaris, michael@0: 'Darwin': Dumper_Mac}[platform.system()](**kwargs) michael@0: michael@0: def SourceIndex(fileStream, outputPath, vcs_root): michael@0: """Takes a list of files, writes info to a data block in a .stream file""" michael@0: # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing michael@0: # Create the srcsrv data block that indexes the pdb file michael@0: result = True michael@0: pdbStreamFile = open(outputPath, "w") michael@0: pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''') michael@0: pdbStreamFile.write(vcs_root) michael@0: pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''') michael@0: pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above michael@0: pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n") michael@0: pdbStreamFile.close() michael@0: return result michael@0: michael@0: def WorkerInitializer(cls, lock, srcdirRepoInfo): michael@0: """Windows worker processes won't have run GlobalInit, and due to a lack of fork(), michael@0: won't inherit the class variables from the parent. They only need a few variables, michael@0: so we run an initializer to set them. Redundant but harmless on other platforms.""" michael@0: cls.lock = lock michael@0: cls.srcdirRepoInfo = srcdirRepoInfo michael@0: michael@0: def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg): michael@0: """multiprocessing can't handle methods as Process targets, so we define michael@0: a simple wrapper function around the work method.""" michael@0: return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg) michael@0: michael@0: class Dumper: michael@0: """This class can dump symbols from a file with debug info, and michael@0: store the output in a directory structure that is valid for use as michael@0: a Breakpad symbol server. Requires a path to a dump_syms binary-- michael@0: |dump_syms| and a directory to store symbols in--|symbol_path|. michael@0: Optionally takes a list of processor architectures to process from michael@0: each debug file--|archs|, the full path to the top source michael@0: directory--|srcdir|, for generating relative source file names, michael@0: and an option to copy debug info files alongside the dumped michael@0: symbol files--|copy_debug|, mostly useful for creating a michael@0: Microsoft Symbol Server from the resulting output. michael@0: michael@0: You don't want to use this directly if you intend to call michael@0: ProcessDir. Instead, call GetPlatformSpecificDumper to michael@0: get an instance of a subclass. michael@0: michael@0: Processing is performed asynchronously via worker processes; in michael@0: order to wait for processing to finish and cleanup correctly, you michael@0: must call Finish after all Process/ProcessDir calls have been made. michael@0: You must also call Dumper.GlobalInit before creating or using any michael@0: instances.""" michael@0: def __init__(self, dump_syms, symbol_path, michael@0: archs=None, michael@0: srcdirs=[], michael@0: copy_debug=False, michael@0: vcsinfo=False, michael@0: srcsrv=False, michael@0: exclude=[], michael@0: repo_manifest=None): michael@0: # popen likes absolute paths, at least on windows michael@0: self.dump_syms = os.path.abspath(dump_syms) michael@0: self.symbol_path = symbol_path michael@0: if archs is None: michael@0: # makes the loop logic simpler michael@0: self.archs = [''] michael@0: else: michael@0: self.archs = ['-a %s' % a for a in archs.split()] michael@0: self.srcdirs = [os.path.normpath(a) for a in srcdirs] michael@0: self.copy_debug = copy_debug michael@0: self.vcsinfo = vcsinfo michael@0: self.srcsrv = srcsrv michael@0: self.exclude = exclude[:] michael@0: if repo_manifest: michael@0: self.parse_repo_manifest(repo_manifest) michael@0: michael@0: # book-keeping to keep track of our jobs and the cleanup work per file tuple michael@0: self.files_record = {} michael@0: self.jobs_record = collections.defaultdict(int) michael@0: michael@0: @classmethod michael@0: def GlobalInit(cls, module=multiprocessing): michael@0: """Initialize the class globals for the multiprocessing setup; must michael@0: be called before any Dumper instances are created and used. Test cases michael@0: may pass in a different module to supply Manager and Pool objects, michael@0: usually multiprocessing.dummy.""" michael@0: num_cpus = module.cpu_count() michael@0: if num_cpus is None: michael@0: # assume a dual core machine if we can't find out for some reason michael@0: # probably better on single core anyway due to I/O constraints michael@0: num_cpus = 2 michael@0: michael@0: # have to create any locks etc before the pool michael@0: cls.manager = module.Manager() michael@0: cls.jobs_condition = Dumper.manager.Condition() michael@0: cls.lock = Dumper.manager.RLock() michael@0: cls.srcdirRepoInfo = Dumper.manager.dict() michael@0: cls.pool = module.Pool(num_cpus, WorkerInitializer, michael@0: (cls, cls.lock, cls.srcdirRepoInfo)) michael@0: michael@0: def JobStarted(self, file_key): michael@0: """Increments the number of submitted jobs for the specified key file, michael@0: defined as the original file we processed; note that a single key file michael@0: can generate up to 1 + len(self.archs) jobs in the Mac case.""" michael@0: with Dumper.jobs_condition: michael@0: self.jobs_record[file_key] += 1 michael@0: Dumper.jobs_condition.notify_all() michael@0: michael@0: def JobFinished(self, file_key): michael@0: """Decrements the number of submitted jobs for the specified key file, michael@0: defined as the original file we processed; once the count is back to 0, michael@0: remove the entry from our record.""" michael@0: with Dumper.jobs_condition: michael@0: self.jobs_record[file_key] -= 1 michael@0: michael@0: if self.jobs_record[file_key] == 0: michael@0: del self.jobs_record[file_key] michael@0: michael@0: Dumper.jobs_condition.notify_all() michael@0: michael@0: def output(self, dest, output_str): michael@0: """Writes |output_str| to |dest|, holding |lock|; michael@0: terminates with a newline.""" michael@0: with Dumper.lock: michael@0: dest.write(output_str + "\n") michael@0: dest.flush() michael@0: michael@0: def output_pid(self, dest, output_str): michael@0: """Debugging output; prepends the pid to the string.""" michael@0: self.output(dest, "%d: %s" % (os.getpid(), output_str)) michael@0: michael@0: def parse_repo_manifest(self, repo_manifest): michael@0: """ michael@0: Parse an XML manifest of repository info as produced michael@0: by the `repo manifest -r` command. michael@0: """ michael@0: doc = parse(repo_manifest) michael@0: if doc.firstChild.tagName != "manifest": michael@0: return michael@0: # First, get remotes. michael@0: def ensure_slash(u): michael@0: if not u.endswith("/"): michael@0: return u + "/" michael@0: return u michael@0: remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")]) michael@0: # And default remote. michael@0: default_remote = None michael@0: if doc.getElementsByTagName("default"): michael@0: default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote") michael@0: # Now get projects. Assume they're relative to repo_manifest. michael@0: base_dir = os.path.abspath(os.path.dirname(repo_manifest)) michael@0: for proj in doc.getElementsByTagName("project"): michael@0: # name is the repository URL relative to the remote path. michael@0: name = proj.getAttribute("name") michael@0: # path is the path on-disk, relative to the manifest file. michael@0: path = proj.getAttribute("path") michael@0: # revision is the changeset ID. michael@0: rev = proj.getAttribute("revision") michael@0: # remote is the base URL to use. michael@0: remote = proj.getAttribute("remote") michael@0: # remote defaults to the . michael@0: if not remote: michael@0: remote = default_remote michael@0: # path defaults to name. michael@0: if not path: michael@0: path = name michael@0: if not (name and path and rev and remote): michael@0: print "Skipping project %s" % proj.toxml() michael@0: continue michael@0: remote = remotes[remote] michael@0: # Turn git URLs into http URLs so that urljoin works. michael@0: if remote.startswith("git:"): michael@0: remote = "http" + remote[3:] michael@0: # Add this project to srcdirs. michael@0: srcdir = os.path.join(base_dir, path) michael@0: self.srcdirs.append(srcdir) michael@0: # And cache its VCS file info. Currently all repos mentioned michael@0: # in a repo manifest are assumed to be git. michael@0: root = urlparse.urljoin(remote, name) michael@0: Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root) michael@0: michael@0: # subclasses override this michael@0: def ShouldProcess(self, file): michael@0: return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude) michael@0: michael@0: # and can override this michael@0: def ShouldSkipDir(self, dir): michael@0: return False michael@0: michael@0: def RunFileCommand(self, file): michael@0: """Utility function, returns the output of file(1)""" michael@0: try: michael@0: # we use -L to read the targets of symlinks, michael@0: # and -b to print just the content, not the filename michael@0: return os.popen("file -Lb " + file).read() michael@0: except: michael@0: return "" michael@0: michael@0: # This is a no-op except on Win32 michael@0: def FixFilenameCase(self, file): michael@0: return file michael@0: michael@0: # This is a no-op except on Win32 michael@0: def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): michael@0: return "" michael@0: michael@0: # subclasses override this if they want to support this michael@0: def CopyDebug(self, file, debug_file, guid): michael@0: pass michael@0: michael@0: def Finish(self, stop_pool=True): michael@0: """Wait for the expected number of jobs to be submitted, and then michael@0: wait for the pool to finish processing them. By default, will close michael@0: and clear the pool, but for testcases that need multiple runs, pass michael@0: stop_pool = False.""" michael@0: with Dumper.jobs_condition: michael@0: while len(self.jobs_record) != 0: michael@0: Dumper.jobs_condition.wait() michael@0: if stop_pool: michael@0: Dumper.pool.close() michael@0: Dumper.pool.join() michael@0: michael@0: def Process(self, file_or_dir): michael@0: """Process a file or all the (valid) files in a directory; processing is performed michael@0: asynchronously, and Finish must be called to wait for it complete and cleanup.""" michael@0: if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir): michael@0: self.ProcessDir(file_or_dir) michael@0: elif os.path.isfile(file_or_dir): michael@0: self.ProcessFiles((file_or_dir,)) michael@0: michael@0: def ProcessDir(self, dir): michael@0: """Process all the valid files in this directory. Valid files michael@0: are determined by calling ShouldProcess; processing is performed michael@0: asynchronously, and Finish must be called to wait for it complete and cleanup.""" michael@0: for root, dirs, files in os.walk(dir): michael@0: for d in dirs[:]: michael@0: if self.ShouldSkipDir(d): michael@0: dirs.remove(d) michael@0: for f in files: michael@0: fullpath = os.path.join(root, f) michael@0: if self.ShouldProcess(fullpath): michael@0: self.ProcessFiles((fullpath,)) michael@0: michael@0: def SubmitJob(self, file_key, func, args, callback): michael@0: """Submits a job to the pool of workers; increments the number of submitted jobs.""" michael@0: self.JobStarted(file_key) michael@0: res = Dumper.pool.apply_async(func, args=args, callback=callback) michael@0: michael@0: def ProcessFilesFinished(self, res): michael@0: """Callback from multiprocesing when ProcessFilesWork finishes; michael@0: run the cleanup work, if any""" michael@0: self.JobFinished(res['files'][-1]) michael@0: # only run the cleanup function once per tuple of files michael@0: self.files_record[res['files']] += 1 michael@0: if self.files_record[res['files']] == len(self.archs): michael@0: del self.files_record[res['files']] michael@0: if res['after']: michael@0: res['after'](res['status'], res['after_arg']) michael@0: michael@0: def ProcessFiles(self, files, after=None, after_arg=None): michael@0: """Dump symbols from these files into a symbol file, stored michael@0: in the proper directory structure in |symbol_path|; processing is performed michael@0: asynchronously, and Finish must be called to wait for it complete and cleanup. michael@0: All files after the first are fallbacks in case the first file does not process michael@0: successfully; if it does, no other files will be touched.""" michael@0: self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files)) michael@0: michael@0: # tries to get the vcs root from the .mozconfig first - if it's not set michael@0: # the tinderbox vcs path will be assigned further down michael@0: vcs_root = os.environ.get("SRCSRV_ROOT") michael@0: for arch_num, arch in enumerate(self.archs): michael@0: self.files_record[files] = 0 # record that we submitted jobs for this tuple of files michael@0: self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished) michael@0: michael@0: def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg): michael@0: self.output_pid(sys.stderr, "Worker processing files: %s" % (files,)) michael@0: michael@0: # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on michael@0: result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files } michael@0: michael@0: sourceFileStream = '' michael@0: for file in files: michael@0: # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully michael@0: try: michael@0: proc = subprocess.Popen([self.dump_syms] + arch.split() + [file], michael@0: stdout=subprocess.PIPE) michael@0: module_line = proc.stdout.next() michael@0: if module_line.startswith("MODULE"): michael@0: # MODULE os cpu guid debug_file michael@0: (guid, debug_file) = (module_line.split())[3:5] michael@0: # strip off .pdb extensions, and append .sym michael@0: sym_file = re.sub("\.pdb$", "", debug_file) + ".sym" michael@0: # we do want forward slashes here michael@0: rel_path = os.path.join(debug_file, michael@0: guid, michael@0: sym_file).replace("\\", "/") michael@0: full_path = os.path.normpath(os.path.join(self.symbol_path, michael@0: rel_path)) michael@0: try: michael@0: os.makedirs(os.path.dirname(full_path)) michael@0: except OSError: # already exists michael@0: pass michael@0: f = open(full_path, "w") michael@0: f.write(module_line) michael@0: # now process the rest of the output michael@0: for line in proc.stdout: michael@0: if line.startswith("FILE"): michael@0: # FILE index filename michael@0: (x, index, filename) = line.rstrip().split(None, 2) michael@0: if sys.platform == "sunos5": michael@0: for srcdir in self.srcdirs: michael@0: start = filename.find(self.srcdir) michael@0: if start != -1: michael@0: filename = filename[start:] michael@0: break michael@0: filename = self.FixFilenameCase(filename) michael@0: sourcepath = filename michael@0: if self.vcsinfo: michael@0: (filename, rootname) = GetVCSFilename(filename, self.srcdirs) michael@0: # sets vcs_root in case the loop through files were to end on an empty rootname michael@0: if vcs_root is None: michael@0: if rootname: michael@0: vcs_root = rootname michael@0: # gather up files with hg for indexing michael@0: if filename.startswith("hg"): michael@0: (ver, checkout, source_file, revision) = filename.split(":", 3) michael@0: sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n" michael@0: f.write("FILE %s %s\n" % (index, filename)) michael@0: else: michael@0: # pass through all other lines unchanged michael@0: f.write(line) michael@0: # we want to return true only if at least one line is not a MODULE or FILE line michael@0: result['status'] = True michael@0: f.close() michael@0: proc.wait() michael@0: # we output relative paths so callers can get a list of what michael@0: # was generated michael@0: self.output(sys.stdout, rel_path) michael@0: if self.srcsrv and vcs_root: michael@0: # add source server indexing to the pdb file michael@0: self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root) michael@0: # only copy debug the first time if we have multiple architectures michael@0: if self.copy_debug and arch_num == 0: michael@0: self.CopyDebug(file, debug_file, guid) michael@0: except StopIteration: michael@0: pass michael@0: except e: michael@0: self.output(sys.stderr, "Unexpected error: %s" % (str(e),)) michael@0: raise michael@0: if result['status']: michael@0: # we only need 1 file to work michael@0: break michael@0: return result michael@0: michael@0: # Platform-specific subclasses. For the most part, these just have michael@0: # logic to determine what files to extract symbols from. michael@0: michael@0: class Dumper_Win32(Dumper): michael@0: fixedFilenameCaseCache = {} michael@0: michael@0: def ShouldProcess(self, file): michael@0: """This function will allow processing of pdb files that have dll michael@0: or exe files with the same base name next to them.""" michael@0: if not Dumper.ShouldProcess(self, file): michael@0: return False michael@0: if file.endswith(".pdb"): michael@0: (path,ext) = os.path.splitext(file) michael@0: if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"): michael@0: return True michael@0: return False michael@0: michael@0: def FixFilenameCase(self, file): michael@0: """Recent versions of Visual C++ put filenames into michael@0: PDB files as all lowercase. If the file exists michael@0: on the local filesystem, fix it.""" michael@0: michael@0: # Use a cached version if we have one. michael@0: if file in self.fixedFilenameCaseCache: michael@0: return self.fixedFilenameCaseCache[file] michael@0: michael@0: result = file michael@0: michael@0: (path, filename) = os.path.split(file) michael@0: if os.path.isdir(path): michael@0: lc_filename = filename.lower() michael@0: for f in os.listdir(path): michael@0: if f.lower() == lc_filename: michael@0: result = os.path.join(path, f) michael@0: break michael@0: michael@0: # Cache the corrected version to avoid future filesystem hits. michael@0: self.fixedFilenameCaseCache[file] = result michael@0: return result michael@0: michael@0: def CopyDebug(self, file, debug_file, guid): michael@0: rel_path = os.path.join(debug_file, michael@0: guid, michael@0: debug_file).replace("\\", "/") michael@0: full_path = os.path.normpath(os.path.join(self.symbol_path, michael@0: rel_path)) michael@0: shutil.copyfile(file, full_path) michael@0: # try compressing it michael@0: compressed_file = os.path.splitext(full_path)[0] + ".pd_" michael@0: # ignore makecab's output michael@0: success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D", michael@0: "CompressionMemory=21", michael@0: full_path, compressed_file], michael@0: stdout=open("NUL:","w"), stderr=subprocess.STDOUT) michael@0: if success == 0 and os.path.exists(compressed_file): michael@0: os.unlink(full_path) michael@0: self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_") michael@0: else: michael@0: self.output(sys.stdout, rel_path) michael@0: michael@0: def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): michael@0: # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing michael@0: debug_file = os.path.abspath(debug_file) michael@0: streamFilename = debug_file + ".stream" michael@0: stream_output_path = os.path.abspath(streamFilename) michael@0: # Call SourceIndex to create the .stream file michael@0: result = SourceIndex(sourceFileStream, stream_output_path, vcs_root) michael@0: if self.copy_debug: michael@0: pdbstr_path = os.environ.get("PDBSTR_PATH") michael@0: pdbstr = os.path.normpath(pdbstr_path) michael@0: subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file), michael@0: "-i:" + os.path.basename(streamFilename), "-s:srcsrv"], michael@0: cwd=os.path.dirname(stream_output_path)) michael@0: # clean up all the .stream files when done michael@0: os.remove(stream_output_path) michael@0: return result michael@0: michael@0: class Dumper_Linux(Dumper): michael@0: objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy' michael@0: def ShouldProcess(self, file): michael@0: """This function will allow processing of files that are michael@0: executable, or end with the .so extension, and additionally michael@0: file(1) reports as being ELF files. It expects to find the file michael@0: command in PATH.""" michael@0: if not Dumper.ShouldProcess(self, file): michael@0: return False michael@0: if file.endswith(".so") or os.access(file, os.X_OK): michael@0: return self.RunFileCommand(file).startswith("ELF") michael@0: return False michael@0: michael@0: def CopyDebug(self, file, debug_file, guid): michael@0: # We want to strip out the debug info, and add a michael@0: # .gnu_debuglink section to the object, so the debugger can michael@0: # actually load our debug info later. michael@0: file_dbg = file + ".dbg" michael@0: if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \ michael@0: subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0: michael@0: rel_path = os.path.join(debug_file, michael@0: guid, michael@0: debug_file + ".dbg") michael@0: full_path = os.path.normpath(os.path.join(self.symbol_path, michael@0: rel_path)) michael@0: shutil.move(file_dbg, full_path) michael@0: # gzip the shipped debug files michael@0: os.system("gzip %s" % full_path) michael@0: self.output(sys.stdout, rel_path + ".gz") michael@0: else: michael@0: if os.path.isfile(file_dbg): michael@0: os.unlink(file_dbg) michael@0: michael@0: class Dumper_Solaris(Dumper): michael@0: def RunFileCommand(self, file): michael@0: """Utility function, returns the output of file(1)""" michael@0: try: michael@0: output = os.popen("file " + file).read() michael@0: return output.split('\t')[1]; michael@0: except: michael@0: return "" michael@0: michael@0: def ShouldProcess(self, file): michael@0: """This function will allow processing of files that are michael@0: executable, or end with the .so extension, and additionally michael@0: file(1) reports as being ELF files. It expects to find the file michael@0: command in PATH.""" michael@0: if not Dumper.ShouldProcess(self, file): michael@0: return False michael@0: if file.endswith(".so") or os.access(file, os.X_OK): michael@0: return self.RunFileCommand(file).startswith("ELF") michael@0: return False michael@0: michael@0: def StartProcessFilesWorkMac(dumper, file): michael@0: """multiprocessing can't handle methods as Process targets, so we define michael@0: a simple wrapper function around the work method.""" michael@0: return dumper.ProcessFilesWorkMac(file) michael@0: michael@0: def AfterMac(status, dsymbundle): michael@0: """Cleanup function to run on Macs after we process the file(s).""" michael@0: # CopyDebug will already have been run from Dumper.ProcessFiles michael@0: shutil.rmtree(dsymbundle) michael@0: michael@0: class Dumper_Mac(Dumper): michael@0: def ShouldProcess(self, file): michael@0: """This function will allow processing of files that are michael@0: executable, or end with the .dylib extension, and additionally michael@0: file(1) reports as being Mach-O files. It expects to find the file michael@0: command in PATH.""" michael@0: if not Dumper.ShouldProcess(self, file): michael@0: return False michael@0: if file.endswith(".dylib") or os.access(file, os.X_OK): michael@0: return self.RunFileCommand(file).startswith("Mach-O") michael@0: return False michael@0: michael@0: def ShouldSkipDir(self, dir): michael@0: """We create .dSYM bundles on the fly, but if someone runs michael@0: buildsymbols twice, we should skip any bundles we created michael@0: previously, otherwise we'll recurse into them and try to michael@0: dump the inner bits again.""" michael@0: if dir.endswith(".dSYM"): michael@0: return True michael@0: return False michael@0: michael@0: def ProcessFiles(self, files, after=None, after_arg=None): michael@0: # also note, files must be len 1 here, since we're the only ones michael@0: # that ever add more than one file to the list michael@0: self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0])) michael@0: self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished) michael@0: michael@0: def ProcessFilesMacFinished(self, result): michael@0: if result['status']: michael@0: # kick off new jobs per-arch with our new list of files michael@0: Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0]) michael@0: # only decrement jobs *after* that, since otherwise we'll remove the record for this file michael@0: self.JobFinished(result['files'][-1]) michael@0: michael@0: def ProcessFilesWorkMac(self, file): michael@0: """dump_syms on Mac needs to be run on a dSYM bundle produced michael@0: by dsymutil(1), so run dsymutil here and pass the bundle name michael@0: down to the superclass method instead.""" michael@0: self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,)) michael@0: michael@0: # our return is a status and a tuple of files to dump symbols for michael@0: # the extra files are fallbacks; as soon as one is dumped successfully, we stop michael@0: result = { 'status' : False, 'files' : None, 'file_key' : file } michael@0: dsymbundle = file + ".dSYM" michael@0: if os.path.exists(dsymbundle): michael@0: shutil.rmtree(dsymbundle) michael@0: # dsymutil takes --arch=foo instead of -a foo like everything else michael@0: subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a] michael@0: + [file], michael@0: stdout=open("/dev/null","w")) michael@0: if not os.path.exists(dsymbundle): michael@0: # dsymutil won't produce a .dSYM for files without symbols michael@0: self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,)) michael@0: result['status'] = False michael@0: result['files'] = (file, ) michael@0: return result michael@0: michael@0: result['status'] = True michael@0: result['files'] = (dsymbundle, file) michael@0: return result michael@0: michael@0: def CopyDebug(self, file, debug_file, guid): michael@0: """ProcessFiles has already produced a dSYM bundle, so we should just michael@0: copy that to the destination directory. However, we'll package it michael@0: into a .tar.bz2 because the debug symbols are pretty huge, and michael@0: also because it's a bundle, so it's a directory. |file| here is the michael@0: dSYM bundle, and |debug_file| is the original filename.""" michael@0: rel_path = os.path.join(debug_file, michael@0: guid, michael@0: os.path.basename(file) + ".tar.bz2") michael@0: full_path = os.path.abspath(os.path.join(self.symbol_path, michael@0: rel_path)) michael@0: success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)], michael@0: cwd=os.path.dirname(file), michael@0: stdout=open("/dev/null","w"), stderr=subprocess.STDOUT) michael@0: if success == 0 and os.path.exists(full_path): michael@0: self.output(sys.stdout, rel_path) michael@0: michael@0: # Entry point if called as a standalone program michael@0: def main(): michael@0: parser = OptionParser(usage="usage: %prog [options] ") michael@0: parser.add_option("-c", "--copy", michael@0: action="store_true", dest="copy_debug", default=False, michael@0: help="Copy debug info files into the same directory structure as symbol files") michael@0: parser.add_option("-a", "--archs", michael@0: action="store", dest="archs", michael@0: help="Run dump_syms -a for each space separated cpu architecture in ARCHS (only on OS X)") michael@0: parser.add_option("-s", "--srcdir", michael@0: action="append", dest="srcdir", default=[], michael@0: help="Use SRCDIR to determine relative paths to source files") michael@0: parser.add_option("-v", "--vcs-info", michael@0: action="store_true", dest="vcsinfo", michael@0: help="Try to retrieve VCS info for each FILE listed in the output") michael@0: parser.add_option("-i", "--source-index", michael@0: action="store_true", dest="srcsrv", default=False, michael@0: help="Add source index information to debug files, making them suitable for use in a source server.") michael@0: parser.add_option("-x", "--exclude", michael@0: action="append", dest="exclude", default=[], metavar="PATTERN", michael@0: help="Skip processing files matching PATTERN.") michael@0: parser.add_option("--repo-manifest", michael@0: action="store", dest="repo_manifest", michael@0: help="""Get source information from this XML manifest michael@0: produced by the `repo manifest -r` command. michael@0: """) michael@0: (options, args) = parser.parse_args() michael@0: michael@0: #check to see if the pdbstr.exe exists michael@0: if options.srcsrv: michael@0: pdbstr = os.environ.get("PDBSTR_PATH") michael@0: if not os.path.exists(pdbstr): michael@0: print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n" michael@0: sys.exit(1) michael@0: michael@0: if len(args) < 3: michael@0: parser.error("not enough arguments") michael@0: exit(1) michael@0: michael@0: dumper = GetPlatformSpecificDumper(dump_syms=args[0], michael@0: symbol_path=args[1], michael@0: copy_debug=options.copy_debug, michael@0: archs=options.archs, michael@0: srcdirs=options.srcdir, michael@0: vcsinfo=options.vcsinfo, michael@0: srcsrv=options.srcsrv, michael@0: exclude=options.exclude, michael@0: repo_manifest=options.repo_manifest) michael@0: for arg in args[2:]: michael@0: dumper.Process(arg) michael@0: dumper.Finish() michael@0: michael@0: # run main if run directly michael@0: if __name__ == "__main__": michael@0: # set up the multiprocessing infrastructure before we start; michael@0: # note that this needs to be in the __main__ guard, or else Windows will choke michael@0: Dumper.GlobalInit() michael@0: michael@0: main()