michael@0: #!/bin/env python
michael@0: # This Source Code Form is subject to the terms of the Mozilla Public
michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0: #
michael@0: # Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
michael@0: #                                <debug info files or dirs>
michael@0: #   Runs dump_syms on each debug info file specified on the command line,
michael@0: #   then places the resulting symbol file in the proper directory
michael@0: #   structure in the symbol store path.  Accepts multiple files
michael@0: #   on the command line, so can be called as part of a pipe using
michael@0: #   find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
michael@0: #   But really, you might just want to pass it <dir>.
michael@0: #
michael@0: #   Parameters accepted:
michael@0: #     -c           : Copy debug info files to the same directory structure
michael@0: #                    as sym files
michael@0: #     -a "<archs>" : Run dump_syms -a <arch> for each space separated
michael@0: #                    cpu architecture in <archs> (only on OS X)
michael@0: #     -s <srcdir>  : Use <srcdir> as the top source directory to
michael@0: #                    generate relative filenames.
michael@0: 
michael@0: import sys
michael@0: import platform
michael@0: import os
michael@0: import re
michael@0: import shutil
michael@0: import textwrap
michael@0: import fnmatch
michael@0: import subprocess
michael@0: import urlparse
michael@0: import multiprocessing
michael@0: import collections
michael@0: from optparse import OptionParser
michael@0: from xml.dom.minidom import parse
michael@0: 
michael@0: # Utility classes
michael@0: 
michael@0: class VCSFileInfo:
michael@0:     """ A base class for version-controlled file information. Ensures that the
michael@0:         following attributes are generated only once (successfully):
michael@0: 
michael@0:             self.root
michael@0:             self.clean_root
michael@0:             self.revision
michael@0:             self.filename
michael@0: 
michael@0:         The attributes are generated by a single call to the GetRoot,
michael@0:         GetRevision, and GetFilename methods. Those methods are explicitly not
michael@0:         implemented here and must be implemented in derived classes. """
michael@0: 
michael@0:     def __init__(self, file):
michael@0:         if not file:
michael@0:             raise ValueError
michael@0:         self.file = file
michael@0: 
michael@0:     def __getattr__(self, name):
michael@0:         """ __getattr__ is only called for attributes that are not set on self,
michael@0:             so setting self.[attr] will prevent future calls to the GetRoot,
michael@0:             GetRevision, and GetFilename methods. We don't set the values on
michael@0:             failure on the off chance that a future call might succeed. """
michael@0: 
michael@0:         if name == "root":
michael@0:             root = self.GetRoot()
michael@0:             if root:
michael@0:                 self.root = root
michael@0:             return root
michael@0: 
michael@0:         elif name == "clean_root":
michael@0:             clean_root = self.GetCleanRoot()
michael@0:             if clean_root:
michael@0:                 self.clean_root = clean_root
michael@0:             return clean_root
michael@0: 
michael@0:         elif name == "revision":
michael@0:             revision = self.GetRevision()
michael@0:             if revision:
michael@0:                 self.revision = revision
michael@0:             return revision
michael@0: 
michael@0:         elif name == "filename":
michael@0:             filename = self.GetFilename()
michael@0:             if filename:
michael@0:                 self.filename = filename
michael@0:             return filename
michael@0: 
michael@0:         raise AttributeError
michael@0: 
michael@0:     def GetRoot(self):
michael@0:         """ This method should return the unmodified root for the file or 'None'
michael@0:             on failure. """
michael@0:         raise NotImplementedError
michael@0: 
michael@0:     def GetCleanRoot(self):
michael@0:         """ This method should return the repository root for the file or 'None'
michael@0:             on failure. """
michael@0:         raise NotImplementedErrors
michael@0: 
michael@0:     def GetRevision(self):
michael@0:         """ This method should return the revision number for the file or 'None'
michael@0:             on failure. """
michael@0:         raise NotImplementedError
michael@0: 
michael@0:     def GetFilename(self):
michael@0:         """ This method should return the repository-specific filename for the
michael@0:             file or 'None' on failure. """
michael@0:         raise NotImplementedError
michael@0: 
michael@0: 
michael@0: # This regex separates protocol and optional username/password from a url.
michael@0: # For instance, all the following urls will be transformed into
michael@0: # 'foo.com/bar':
michael@0: #
michael@0: #   http://foo.com/bar
michael@0: #   svn+ssh://user@foo.com/bar
michael@0: #   svn+ssh://user:pass@foo.com/bar
michael@0: #
michael@0: rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$')
michael@0: 
michael@0: def read_output(*args):
michael@0:     (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate()
michael@0:     return stdout.rstrip()
michael@0: 
michael@0: class HGRepoInfo:
michael@0:     def __init__(self, path):
michael@0:         self.path = path
michael@0:         rev = read_output('hg', '-R', path,
michael@0:                           'parent', '--template={node|short}')
michael@0:         # Look for the default hg path.  If SRVSRV_ROOT is set, we
michael@0:         # don't bother asking hg.
michael@0:         hg_root = os.environ.get("SRCSRV_ROOT")
michael@0:         if hg_root:
michael@0:             root = hg_root
michael@0:         else:
michael@0:             root = read_output('hg', '-R', path,
michael@0:                                'showconfig', 'paths.default')
michael@0:             if not root:
michael@0:                 print >> sys.stderr, "Failed to get HG Repo for %s" % path
michael@0:         cleanroot = None
michael@0:         if root:
michael@0:             match = rootRegex.match(root)
michael@0:             if match:
michael@0:                 cleanroot = match.group(1)
michael@0:                 if cleanroot.endswith('/'):
michael@0:                     cleanroot = cleanroot[:-1]
michael@0:         if cleanroot is None:
michael@0:             print >> sys.stderr, textwrap.dedent("""\
michael@0:                 Could not determine repo info for %s.  This is either not a clone of the web-based
michael@0:                 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path
michael@0:             sys.exit(1)
michael@0:         self.rev = rev
michael@0:         self.root = root
michael@0:         self.cleanroot = cleanroot
michael@0: 
michael@0:     def GetFileInfo(self, file):
michael@0:         return HGFileInfo(file, self)
michael@0: 
michael@0: class HGFileInfo(VCSFileInfo):
michael@0:     def __init__(self, file, repo):
michael@0:         VCSFileInfo.__init__(self, file)
michael@0:         self.repo = repo
michael@0:         self.file = os.path.relpath(file, repo.path)
michael@0: 
michael@0:     def GetRoot(self):
michael@0:         return self.repo.root
michael@0: 
michael@0:     def GetCleanRoot(self):
michael@0:         return self.repo.cleanroot
michael@0: 
michael@0:     def GetRevision(self):
michael@0:         return self.repo.rev
michael@0: 
michael@0:     def GetFilename(self):
michael@0:         if self.revision and self.clean_root:
michael@0:             return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision)
michael@0:         return self.file
michael@0: 
michael@0: class GitRepoInfo:
michael@0:     """
michael@0:     Info about a local git repository. Does not currently
michael@0:     support discovering info about a git clone, the info must be
michael@0:     provided out-of-band.
michael@0:     """
michael@0:     def __init__(self, path, rev, root):
michael@0:         self.path = path
michael@0:         cleanroot = None
michael@0:         if root:
michael@0:             match = rootRegex.match(root)
michael@0:             if match:
michael@0:                 cleanroot = match.group(1)
michael@0:                 if cleanroot.endswith('/'):
michael@0:                     cleanroot = cleanroot[:-1]
michael@0:         if cleanroot is None:
michael@0:             print >> sys.stderr, textwrap.dedent("""\
michael@0:                 Could not determine repo info for %s (%s).  This is either not a clone of a web-based
michael@0:                 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root)
michael@0:             sys.exit(1)
michael@0:         self.rev = rev
michael@0:         self.cleanroot = cleanroot
michael@0: 
michael@0:     def GetFileInfo(self, file):
michael@0:         return GitFileInfo(file, self)
michael@0: 
michael@0: class GitFileInfo(VCSFileInfo):
michael@0:     def __init__(self, file, repo):
michael@0:         VCSFileInfo.__init__(self, file)
michael@0:         self.repo = repo
michael@0:         self.file = os.path.relpath(file, repo.path)
michael@0: 
michael@0:     def GetRoot(self):
michael@0:         return self.repo.path
michael@0: 
michael@0:     def GetCleanRoot(self):
michael@0:         return self.repo.cleanroot
michael@0: 
michael@0:     def GetRevision(self):
michael@0:         return self.repo.rev
michael@0: 
michael@0:     def GetFilename(self):
michael@0:         if self.revision and self.clean_root:
michael@0:             return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
michael@0:         return self.file
michael@0: 
michael@0: # Utility functions
michael@0: 
michael@0: # A cache of files for which VCS info has already been determined. Used to
michael@0: # prevent extra filesystem activity or process launching.
michael@0: vcsFileInfoCache = {}
michael@0: 
michael@0: def IsInDir(file, dir):
michael@0:     # the lower() is to handle win32+vc8, where
michael@0:     # the source filenames come out all lowercase,
michael@0:     # but the srcdir can be mixed case
michael@0:     return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
michael@0: 
michael@0: def GetVCSFilenameFromSrcdir(file, srcdir):
michael@0:     if srcdir not in Dumper.srcdirRepoInfo:
michael@0:         # Not in cache, so find it adnd cache it
michael@0:         if os.path.isdir(os.path.join(srcdir, '.hg')):
michael@0:             Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
michael@0:         else:
michael@0:             # Unknown VCS or file is not in a repo.
michael@0:             return None
michael@0:     return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file)
michael@0: 
michael@0: def GetVCSFilename(file, srcdirs):
michael@0:     """Given a full path to a file, and the top source directory,
michael@0:     look for version control information about this file, and return
michael@0:     a tuple containing
michael@0:     1) a specially formatted filename that contains the VCS type,
michael@0:     VCS location, relative filename, and revision number, formatted like:
michael@0:     vcs:vcs location:filename:revision
michael@0:     For example:
michael@0:     cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36
michael@0:     2) the unmodified root information if it exists"""
michael@0:     (path, filename) = os.path.split(file)
michael@0:     if path == '' or filename == '':
michael@0:         return (file, None)
michael@0: 
michael@0:     fileInfo = None
michael@0:     root = ''
michael@0:     if file in vcsFileInfoCache:
michael@0:         # Already cached this info, use it.
michael@0:         fileInfo = vcsFileInfoCache[file]
michael@0:     else:
michael@0:         for srcdir in srcdirs:
michael@0:             if not IsInDir(file, srcdir):
michael@0:                 continue
michael@0:             fileInfo = GetVCSFilenameFromSrcdir(file, srcdir)
michael@0:             if fileInfo:
michael@0:                 vcsFileInfoCache[file] = fileInfo
michael@0:                 break
michael@0: 
michael@0:     if fileInfo:
michael@0:         file = fileInfo.filename
michael@0:         root = fileInfo.root
michael@0: 
michael@0:     # we want forward slashes on win32 paths
michael@0:     return (file.replace("\\", "/"), root)
michael@0: 
michael@0: def GetPlatformSpecificDumper(**kwargs):
michael@0:     """This function simply returns a instance of a subclass of Dumper
michael@0:     that is appropriate for the current platform."""
michael@0:     # Python 2.5 has a bug where platform.system() returns 'Microsoft'.
michael@0:     # Remove this when we no longer support Python 2.5.
michael@0:     return {'Windows': Dumper_Win32,
michael@0:             'Microsoft': Dumper_Win32,
michael@0:             'Linux': Dumper_Linux,
michael@0:             'Sunos5': Dumper_Solaris,
michael@0:             'Darwin': Dumper_Mac}[platform.system()](**kwargs)
michael@0: 
michael@0: def SourceIndex(fileStream, outputPath, vcs_root):
michael@0:     """Takes a list of files, writes info to a data block in a .stream file"""
michael@0:     # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
michael@0:     # Create the srcsrv data block that indexes the pdb file
michael@0:     result = True
michael@0:     pdbStreamFile = open(outputPath, "w")
michael@0:     pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''')
michael@0:     pdbStreamFile.write(vcs_root)
michael@0:     pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''')
michael@0:     pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above
michael@0:     pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n")
michael@0:     pdbStreamFile.close()
michael@0:     return result
michael@0: 
michael@0: def WorkerInitializer(cls, lock, srcdirRepoInfo):
michael@0:     """Windows worker processes won't have run GlobalInit, and due to a lack of fork(),
michael@0:     won't inherit the class variables from the parent. They only need a few variables,
michael@0:     so we run an initializer to set them. Redundant but harmless on other platforms."""
michael@0:     cls.lock = lock
michael@0:     cls.srcdirRepoInfo = srcdirRepoInfo
michael@0: 
michael@0: def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg):
michael@0:     """multiprocessing can't handle methods as Process targets, so we define
michael@0:     a simple wrapper function around the work method."""
michael@0:     return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg)
michael@0: 
michael@0: class Dumper:
michael@0:     """This class can dump symbols from a file with debug info, and
michael@0:     store the output in a directory structure that is valid for use as
michael@0:     a Breakpad symbol server.  Requires a path to a dump_syms binary--
michael@0:     |dump_syms| and a directory to store symbols in--|symbol_path|.
michael@0:     Optionally takes a list of processor architectures to process from
michael@0:     each debug file--|archs|, the full path to the top source
michael@0:     directory--|srcdir|, for generating relative source file names,
michael@0:     and an option to copy debug info files alongside the dumped
michael@0:     symbol files--|copy_debug|, mostly useful for creating a
michael@0:     Microsoft Symbol Server from the resulting output.
michael@0: 
michael@0:     You don't want to use this directly if you intend to call
michael@0:     ProcessDir.  Instead, call GetPlatformSpecificDumper to
michael@0:     get an instance of a subclass.
michael@0:  
michael@0:     Processing is performed asynchronously via worker processes; in
michael@0:     order to wait for processing to finish and cleanup correctly, you
michael@0:     must call Finish after all Process/ProcessDir calls have been made.
michael@0:     You must also call Dumper.GlobalInit before creating or using any
michael@0:     instances."""
michael@0:     def __init__(self, dump_syms, symbol_path,
michael@0:                  archs=None,
michael@0:                  srcdirs=[],
michael@0:                  copy_debug=False,
michael@0:                  vcsinfo=False,
michael@0:                  srcsrv=False,
michael@0:                  exclude=[],
michael@0:                  repo_manifest=None):
michael@0:         # popen likes absolute paths, at least on windows
michael@0:         self.dump_syms = os.path.abspath(dump_syms)
michael@0:         self.symbol_path = symbol_path
michael@0:         if archs is None:
michael@0:             # makes the loop logic simpler
michael@0:             self.archs = ['']
michael@0:         else:
michael@0:             self.archs = ['-a %s' % a for a in archs.split()]
michael@0:         self.srcdirs = [os.path.normpath(a) for a in srcdirs]
michael@0:         self.copy_debug = copy_debug
michael@0:         self.vcsinfo = vcsinfo
michael@0:         self.srcsrv = srcsrv
michael@0:         self.exclude = exclude[:]
michael@0:         if repo_manifest:
michael@0:             self.parse_repo_manifest(repo_manifest)
michael@0: 
michael@0:         # book-keeping to keep track of our jobs and the cleanup work per file tuple
michael@0:         self.files_record = {}
michael@0:         self.jobs_record = collections.defaultdict(int)
michael@0: 
michael@0:     @classmethod
michael@0:     def GlobalInit(cls, module=multiprocessing):
michael@0:         """Initialize the class globals for the multiprocessing setup; must
michael@0:         be called before any Dumper instances are created and used. Test cases
michael@0:         may pass in a different module to supply Manager and Pool objects,
michael@0:         usually multiprocessing.dummy."""
michael@0:         num_cpus = module.cpu_count()
michael@0:         if num_cpus is None:
michael@0:             # assume a dual core machine if we can't find out for some reason
michael@0:             # probably better on single core anyway due to I/O constraints
michael@0:             num_cpus = 2
michael@0: 
michael@0:         # have to create any locks etc before the pool
michael@0:         cls.manager = module.Manager()
michael@0:         cls.jobs_condition = Dumper.manager.Condition()
michael@0:         cls.lock = Dumper.manager.RLock()
michael@0:         cls.srcdirRepoInfo = Dumper.manager.dict()
michael@0:         cls.pool = module.Pool(num_cpus, WorkerInitializer,
michael@0:                                (cls, cls.lock, cls.srcdirRepoInfo))
michael@0: 
michael@0:     def JobStarted(self, file_key):
michael@0:         """Increments the number of submitted jobs for the specified key file,
michael@0:         defined as the original file we processed; note that a single key file
michael@0:         can generate up to 1 + len(self.archs) jobs in the Mac case."""
michael@0:         with Dumper.jobs_condition:
michael@0:             self.jobs_record[file_key] += 1
michael@0:             Dumper.jobs_condition.notify_all()
michael@0: 
michael@0:     def JobFinished(self, file_key):
michael@0:         """Decrements the number of submitted jobs for the specified key file,
michael@0:         defined as the original file we processed; once the count is back to 0,
michael@0:         remove the entry from our record."""
michael@0:         with Dumper.jobs_condition:
michael@0:             self.jobs_record[file_key] -= 1
michael@0: 
michael@0:             if self.jobs_record[file_key] == 0:
michael@0:                 del self.jobs_record[file_key]
michael@0: 
michael@0:             Dumper.jobs_condition.notify_all()
michael@0: 
michael@0:     def output(self, dest, output_str):
michael@0:         """Writes |output_str| to |dest|, holding |lock|;
michael@0:         terminates with a newline."""
michael@0:         with Dumper.lock:
michael@0:             dest.write(output_str + "\n")
michael@0:             dest.flush()
michael@0: 
michael@0:     def output_pid(self, dest, output_str):
michael@0:         """Debugging output; prepends the pid to the string."""
michael@0:         self.output(dest, "%d: %s" % (os.getpid(), output_str))
michael@0: 
michael@0:     def parse_repo_manifest(self, repo_manifest):
michael@0:         """
michael@0:         Parse an XML manifest of repository info as produced
michael@0:         by the `repo manifest -r` command.
michael@0:         """
michael@0:         doc = parse(repo_manifest)
michael@0:         if doc.firstChild.tagName != "manifest":
michael@0:             return
michael@0:         # First, get remotes.
michael@0:         def ensure_slash(u):
michael@0:             if not u.endswith("/"):
michael@0:                 return u + "/"
michael@0:             return u
michael@0:         remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")])
michael@0:         # And default remote.
michael@0:         default_remote = None
michael@0:         if doc.getElementsByTagName("default"):
michael@0:             default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote")
michael@0:         # Now get projects. Assume they're relative to repo_manifest.
michael@0:         base_dir = os.path.abspath(os.path.dirname(repo_manifest))
michael@0:         for proj in doc.getElementsByTagName("project"):
michael@0:             # name is the repository URL relative to the remote path.
michael@0:             name = proj.getAttribute("name")
michael@0:             # path is the path on-disk, relative to the manifest file.
michael@0:             path = proj.getAttribute("path")
michael@0:             # revision is the changeset ID.
michael@0:             rev = proj.getAttribute("revision")
michael@0:             # remote is the base URL to use.
michael@0:             remote = proj.getAttribute("remote")
michael@0:             # remote defaults to the <default remote>.
michael@0:             if not remote:
michael@0:                 remote = default_remote
michael@0:             # path defaults to name.
michael@0:             if not path:
michael@0:                 path = name
michael@0:             if not (name and path and rev and remote):
michael@0:                 print "Skipping project %s" % proj.toxml()
michael@0:                 continue
michael@0:             remote = remotes[remote]
michael@0:             # Turn git URLs into http URLs so that urljoin works.
michael@0:             if remote.startswith("git:"):
michael@0:                 remote = "http" + remote[3:]
michael@0:             # Add this project to srcdirs.
michael@0:             srcdir = os.path.join(base_dir, path)
michael@0:             self.srcdirs.append(srcdir)
michael@0:             # And cache its VCS file info. Currently all repos mentioned
michael@0:             # in a repo manifest are assumed to be git.
michael@0:             root = urlparse.urljoin(remote, name)
michael@0:             Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
michael@0: 
michael@0:     # subclasses override this
michael@0:     def ShouldProcess(self, file):
michael@0:         return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude)
michael@0: 
michael@0:     # and can override this
michael@0:     def ShouldSkipDir(self, dir):
michael@0:         return False
michael@0: 
michael@0:     def RunFileCommand(self, file):
michael@0:         """Utility function, returns the output of file(1)"""
michael@0:         try:
michael@0:             # we use -L to read the targets of symlinks,
michael@0:             # and -b to print just the content, not the filename
michael@0:             return os.popen("file -Lb " + file).read()
michael@0:         except:
michael@0:             return ""
michael@0: 
michael@0:     # This is a no-op except on Win32
michael@0:     def FixFilenameCase(self, file):
michael@0:         return file
michael@0: 
michael@0:     # This is a no-op except on Win32
michael@0:     def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
michael@0:         return ""
michael@0: 
michael@0:     # subclasses override this if they want to support this
michael@0:     def CopyDebug(self, file, debug_file, guid):
michael@0:         pass
michael@0: 
michael@0:     def Finish(self, stop_pool=True):
michael@0:         """Wait for the expected number of jobs to be submitted, and then
michael@0:         wait for the pool to finish processing them. By default, will close
michael@0:         and clear the pool, but for testcases that need multiple runs, pass
michael@0:         stop_pool = False."""
michael@0:         with Dumper.jobs_condition:
michael@0:             while len(self.jobs_record) != 0:
michael@0:                 Dumper.jobs_condition.wait()
michael@0:         if stop_pool:
michael@0:             Dumper.pool.close()
michael@0:             Dumper.pool.join()
michael@0: 
michael@0:     def Process(self, file_or_dir):
michael@0:         """Process a file or all the (valid) files in a directory; processing is performed
michael@0:         asynchronously, and Finish must be called to wait for it complete and cleanup."""
michael@0:         if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir):
michael@0:             self.ProcessDir(file_or_dir)
michael@0:         elif os.path.isfile(file_or_dir):
michael@0:             self.ProcessFiles((file_or_dir,))
michael@0: 
michael@0:     def ProcessDir(self, dir):
michael@0:         """Process all the valid files in this directory.  Valid files
michael@0:         are determined by calling ShouldProcess; processing is performed
michael@0:         asynchronously, and Finish must be called to wait for it complete and cleanup."""
michael@0:         for root, dirs, files in os.walk(dir):
michael@0:             for d in dirs[:]:
michael@0:                 if self.ShouldSkipDir(d):
michael@0:                     dirs.remove(d)
michael@0:             for f in files:
michael@0:                 fullpath = os.path.join(root, f)
michael@0:                 if self.ShouldProcess(fullpath):
michael@0:                     self.ProcessFiles((fullpath,))
michael@0: 
michael@0:     def SubmitJob(self, file_key, func, args, callback):
michael@0:         """Submits a job to the pool of workers; increments the number of submitted jobs."""
michael@0:         self.JobStarted(file_key)
michael@0:         res = Dumper.pool.apply_async(func, args=args, callback=callback)
michael@0: 
michael@0:     def ProcessFilesFinished(self, res):
michael@0:         """Callback from multiprocesing when ProcessFilesWork finishes;
michael@0:         run the cleanup work, if any"""
michael@0:         self.JobFinished(res['files'][-1])
michael@0:         # only run the cleanup function once per tuple of files
michael@0:         self.files_record[res['files']] += 1
michael@0:         if self.files_record[res['files']] == len(self.archs):
michael@0:             del self.files_record[res['files']]
michael@0:             if res['after']:
michael@0:                 res['after'](res['status'], res['after_arg'])
michael@0: 
michael@0:     def ProcessFiles(self, files, after=None, after_arg=None):
michael@0:         """Dump symbols from these files into a symbol file, stored
michael@0:         in the proper directory structure in  |symbol_path|; processing is performed
michael@0:         asynchronously, and Finish must be called to wait for it complete and cleanup.
michael@0:         All files after the first are fallbacks in case the first file does not process
michael@0:         successfully; if it does, no other files will be touched."""
michael@0:         self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files))
michael@0: 
michael@0:         # tries to get the vcs root from the .mozconfig first - if it's not set
michael@0:         # the tinderbox vcs path will be assigned further down
michael@0:         vcs_root = os.environ.get("SRCSRV_ROOT")
michael@0:         for arch_num, arch in enumerate(self.archs):
michael@0:             self.files_record[files] = 0 # record that we submitted jobs for this tuple of files
michael@0:             self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished)
michael@0: 
michael@0:     def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg):
michael@0:         self.output_pid(sys.stderr, "Worker processing files: %s" % (files,))
michael@0: 
michael@0:         # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on
michael@0:         result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files }
michael@0: 
michael@0:         sourceFileStream = ''
michael@0:         for file in files:
michael@0:             # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully
michael@0:             try:
michael@0:                 proc = subprocess.Popen([self.dump_syms] + arch.split() + [file],
michael@0:                                         stdout=subprocess.PIPE)
michael@0:                 module_line = proc.stdout.next()
michael@0:                 if module_line.startswith("MODULE"):
michael@0:                     # MODULE os cpu guid debug_file
michael@0:                     (guid, debug_file) = (module_line.split())[3:5]
michael@0:                     # strip off .pdb extensions, and append .sym
michael@0:                     sym_file = re.sub("\.pdb$", "", debug_file) + ".sym"
michael@0:                     # we do want forward slashes here
michael@0:                     rel_path = os.path.join(debug_file,
michael@0:                                             guid,
michael@0:                                             sym_file).replace("\\", "/")
michael@0:                     full_path = os.path.normpath(os.path.join(self.symbol_path,
michael@0:                                                               rel_path))
michael@0:                     try:
michael@0:                         os.makedirs(os.path.dirname(full_path))
michael@0:                     except OSError: # already exists
michael@0:                         pass
michael@0:                     f = open(full_path, "w")
michael@0:                     f.write(module_line)
michael@0:                     # now process the rest of the output
michael@0:                     for line in proc.stdout:
michael@0:                         if line.startswith("FILE"):
michael@0:                             # FILE index filename
michael@0:                             (x, index, filename) = line.rstrip().split(None, 2)
michael@0:                             if sys.platform == "sunos5":
michael@0:                                 for srcdir in self.srcdirs:
michael@0:                                     start = filename.find(self.srcdir)
michael@0:                                     if start != -1:
michael@0:                                         filename = filename[start:]
michael@0:                                         break
michael@0:                             filename = self.FixFilenameCase(filename)
michael@0:                             sourcepath = filename
michael@0:                             if self.vcsinfo:
michael@0:                                 (filename, rootname) = GetVCSFilename(filename, self.srcdirs)
michael@0:                                 # sets vcs_root in case the loop through files were to end on an empty rootname
michael@0:                                 if vcs_root is None:
michael@0:                                   if rootname:
michael@0:                                      vcs_root = rootname
michael@0:                             # gather up files with hg for indexing   
michael@0:                             if filename.startswith("hg"):
michael@0:                                 (ver, checkout, source_file, revision) = filename.split(":", 3)
michael@0:                                 sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n"
michael@0:                             f.write("FILE %s %s\n" % (index, filename))
michael@0:                         else:
michael@0:                             # pass through all other lines unchanged
michael@0:                             f.write(line)
michael@0:                             # we want to return true only if at least one line is not a MODULE or FILE line
michael@0:                             result['status'] = True
michael@0:                     f.close()
michael@0:                     proc.wait()
michael@0:                     # we output relative paths so callers can get a list of what
michael@0:                     # was generated
michael@0:                     self.output(sys.stdout, rel_path)
michael@0:                     if self.srcsrv and vcs_root:
michael@0:                         # add source server indexing to the pdb file
michael@0:                         self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root)
michael@0:                     # only copy debug the first time if we have multiple architectures
michael@0:                     if self.copy_debug and arch_num == 0:
michael@0:                         self.CopyDebug(file, debug_file, guid)
michael@0:             except StopIteration:
michael@0:                 pass
michael@0:             except e:
michael@0:                 self.output(sys.stderr, "Unexpected error: %s" % (str(e),))
michael@0:                 raise
michael@0:             if result['status']:
michael@0:                 # we only need 1 file to work
michael@0:                 break
michael@0:         return result
michael@0: 
michael@0: # Platform-specific subclasses.  For the most part, these just have
michael@0: # logic to determine what files to extract symbols from.
michael@0: 
michael@0: class Dumper_Win32(Dumper):
michael@0:     fixedFilenameCaseCache = {}
michael@0: 
michael@0:     def ShouldProcess(self, file):
michael@0:         """This function will allow processing of pdb files that have dll
michael@0:         or exe files with the same base name next to them."""
michael@0:         if not Dumper.ShouldProcess(self, file):
michael@0:             return False
michael@0:         if file.endswith(".pdb"):
michael@0:             (path,ext) = os.path.splitext(file)
michael@0:             if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"):
michael@0:                 return True
michael@0:         return False
michael@0: 
michael@0:     def FixFilenameCase(self, file):
michael@0:         """Recent versions of Visual C++ put filenames into
michael@0:         PDB files as all lowercase.  If the file exists
michael@0:         on the local filesystem, fix it."""
michael@0: 
michael@0:         # Use a cached version if we have one.
michael@0:         if file in self.fixedFilenameCaseCache:
michael@0:             return self.fixedFilenameCaseCache[file]
michael@0: 
michael@0:         result = file
michael@0: 
michael@0:         (path, filename) = os.path.split(file)
michael@0:         if os.path.isdir(path):
michael@0:             lc_filename = filename.lower()
michael@0:             for f in os.listdir(path):
michael@0:                 if f.lower() == lc_filename:
michael@0:                     result = os.path.join(path, f)
michael@0:                     break
michael@0: 
michael@0:         # Cache the corrected version to avoid future filesystem hits.
michael@0:         self.fixedFilenameCaseCache[file] = result
michael@0:         return result
michael@0: 
michael@0:     def CopyDebug(self, file, debug_file, guid):
michael@0:         rel_path = os.path.join(debug_file,
michael@0:                                 guid,
michael@0:                                 debug_file).replace("\\", "/")
michael@0:         full_path = os.path.normpath(os.path.join(self.symbol_path,
michael@0:                                                   rel_path))
michael@0:         shutil.copyfile(file, full_path)
michael@0:         # try compressing it
michael@0:         compressed_file = os.path.splitext(full_path)[0] + ".pd_"
michael@0:         # ignore makecab's output
michael@0:         success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D",
michael@0:                                    "CompressionMemory=21",
michael@0:                                    full_path, compressed_file],
michael@0:                                   stdout=open("NUL:","w"), stderr=subprocess.STDOUT)
michael@0:         if success == 0 and os.path.exists(compressed_file):
michael@0:             os.unlink(full_path)
michael@0:             self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_")
michael@0:         else:
michael@0:             self.output(sys.stdout, rel_path)
michael@0:         
michael@0:     def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
michael@0:         # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
michael@0:         debug_file = os.path.abspath(debug_file)
michael@0:         streamFilename = debug_file + ".stream"
michael@0:         stream_output_path = os.path.abspath(streamFilename)
michael@0:         # Call SourceIndex to create the .stream file
michael@0:         result = SourceIndex(sourceFileStream, stream_output_path, vcs_root)
michael@0:         if self.copy_debug:
michael@0:             pdbstr_path = os.environ.get("PDBSTR_PATH")
michael@0:             pdbstr = os.path.normpath(pdbstr_path)
michael@0:             subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file),
michael@0:                              "-i:" + os.path.basename(streamFilename), "-s:srcsrv"],
michael@0:                             cwd=os.path.dirname(stream_output_path))
michael@0:             # clean up all the .stream files when done
michael@0:             os.remove(stream_output_path)
michael@0:         return result
michael@0: 
michael@0: class Dumper_Linux(Dumper):
michael@0:     objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy'
michael@0:     def ShouldProcess(self, file):
michael@0:         """This function will allow processing of files that are
michael@0:         executable, or end with the .so extension, and additionally
michael@0:         file(1) reports as being ELF files.  It expects to find the file
michael@0:         command in PATH."""
michael@0:         if not Dumper.ShouldProcess(self, file):
michael@0:             return False
michael@0:         if file.endswith(".so") or os.access(file, os.X_OK):
michael@0:             return self.RunFileCommand(file).startswith("ELF")
michael@0:         return False
michael@0: 
michael@0:     def CopyDebug(self, file, debug_file, guid):
michael@0:         # We want to strip out the debug info, and add a
michael@0:         # .gnu_debuglink section to the object, so the debugger can
michael@0:         # actually load our debug info later.
michael@0:         file_dbg = file + ".dbg"
michael@0:         if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \
michael@0:            subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0:
michael@0:             rel_path = os.path.join(debug_file,
michael@0:                                     guid,
michael@0:                                     debug_file + ".dbg")
michael@0:             full_path = os.path.normpath(os.path.join(self.symbol_path,
michael@0:                                                       rel_path))
michael@0:             shutil.move(file_dbg, full_path)
michael@0:             # gzip the shipped debug files
michael@0:             os.system("gzip %s" % full_path)
michael@0:             self.output(sys.stdout, rel_path + ".gz")
michael@0:         else:
michael@0:             if os.path.isfile(file_dbg):
michael@0:                 os.unlink(file_dbg)
michael@0: 
michael@0: class Dumper_Solaris(Dumper):
michael@0:     def RunFileCommand(self, file):
michael@0:         """Utility function, returns the output of file(1)"""
michael@0:         try:
michael@0:             output = os.popen("file " + file).read()
michael@0:             return output.split('\t')[1];
michael@0:         except:
michael@0:             return ""
michael@0: 
michael@0:     def ShouldProcess(self, file):
michael@0:         """This function will allow processing of files that are
michael@0:         executable, or end with the .so extension, and additionally
michael@0:         file(1) reports as being ELF files.  It expects to find the file
michael@0:         command in PATH."""
michael@0:         if not Dumper.ShouldProcess(self, file):
michael@0:             return False
michael@0:         if file.endswith(".so") or os.access(file, os.X_OK):
michael@0:             return self.RunFileCommand(file).startswith("ELF")
michael@0:         return False
michael@0: 
michael@0: def StartProcessFilesWorkMac(dumper, file):
michael@0:     """multiprocessing can't handle methods as Process targets, so we define
michael@0:     a simple wrapper function around the work method."""
michael@0:     return dumper.ProcessFilesWorkMac(file)
michael@0: 
michael@0: def AfterMac(status, dsymbundle):
michael@0:     """Cleanup function to run on Macs after we process the file(s)."""
michael@0:     # CopyDebug will already have been run from Dumper.ProcessFiles
michael@0:     shutil.rmtree(dsymbundle)
michael@0: 
michael@0: class Dumper_Mac(Dumper):
michael@0:     def ShouldProcess(self, file):
michael@0:         """This function will allow processing of files that are
michael@0:         executable, or end with the .dylib extension, and additionally
michael@0:         file(1) reports as being Mach-O files.  It expects to find the file
michael@0:         command in PATH."""
michael@0:         if not Dumper.ShouldProcess(self, file):
michael@0:             return False
michael@0:         if file.endswith(".dylib") or os.access(file, os.X_OK):
michael@0:             return self.RunFileCommand(file).startswith("Mach-O")
michael@0:         return False
michael@0: 
michael@0:     def ShouldSkipDir(self, dir):
michael@0:         """We create .dSYM bundles on the fly, but if someone runs
michael@0:         buildsymbols twice, we should skip any bundles we created
michael@0:         previously, otherwise we'll recurse into them and try to 
michael@0:         dump the inner bits again."""
michael@0:         if dir.endswith(".dSYM"):
michael@0:             return True
michael@0:         return False
michael@0: 
michael@0:     def ProcessFiles(self, files, after=None, after_arg=None):
michael@0:         # also note, files must be len 1 here, since we're the only ones
michael@0:         # that ever add more than one file to the list
michael@0:         self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0]))
michael@0:         self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished)
michael@0: 
michael@0:     def ProcessFilesMacFinished(self, result):
michael@0:         if result['status']:
michael@0:             # kick off new jobs per-arch with our new list of files
michael@0:             Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0])
michael@0:         # only decrement jobs *after* that, since otherwise we'll remove the record for this file
michael@0:         self.JobFinished(result['files'][-1])
michael@0: 
michael@0:     def ProcessFilesWorkMac(self, file):
michael@0:         """dump_syms on Mac needs to be run on a dSYM bundle produced
michael@0:         by dsymutil(1), so run dsymutil here and pass the bundle name
michael@0:         down to the superclass method instead."""
michael@0:         self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,))
michael@0: 
michael@0:         # our return is a status and a tuple of files to dump symbols for
michael@0:         # the extra files are fallbacks; as soon as one is dumped successfully, we stop
michael@0:         result = { 'status' : False, 'files' : None, 'file_key' : file }
michael@0:         dsymbundle = file + ".dSYM"
michael@0:         if os.path.exists(dsymbundle):
michael@0:             shutil.rmtree(dsymbundle)
michael@0:         # dsymutil takes --arch=foo instead of -a foo like everything else
michael@0:         subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a]
michael@0:                         + [file],
michael@0:                         stdout=open("/dev/null","w"))
michael@0:         if not os.path.exists(dsymbundle):
michael@0:             # dsymutil won't produce a .dSYM for files without symbols
michael@0:             self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,))
michael@0:             result['status'] = False
michael@0:             result['files'] = (file, )
michael@0:             return result
michael@0: 
michael@0:         result['status'] = True
michael@0:         result['files'] = (dsymbundle, file)
michael@0:         return result
michael@0: 
michael@0:     def CopyDebug(self, file, debug_file, guid):
michael@0:         """ProcessFiles has already produced a dSYM bundle, so we should just
michael@0:         copy that to the destination directory. However, we'll package it
michael@0:         into a .tar.bz2 because the debug symbols are pretty huge, and
michael@0:         also because it's a bundle, so it's a directory. |file| here is the
michael@0:         dSYM bundle, and |debug_file| is the original filename."""
michael@0:         rel_path = os.path.join(debug_file,
michael@0:                                 guid,
michael@0:                                 os.path.basename(file) + ".tar.bz2")
michael@0:         full_path = os.path.abspath(os.path.join(self.symbol_path,
michael@0:                                                   rel_path))
michael@0:         success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)],
michael@0:                                   cwd=os.path.dirname(file),
michael@0:                                   stdout=open("/dev/null","w"), stderr=subprocess.STDOUT)
michael@0:         if success == 0 and os.path.exists(full_path):
michael@0:             self.output(sys.stdout, rel_path)
michael@0: 
michael@0: # Entry point if called as a standalone program
michael@0: def main():
michael@0:     parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>")
michael@0:     parser.add_option("-c", "--copy",
michael@0:                       action="store_true", dest="copy_debug", default=False,
michael@0:                       help="Copy debug info files into the same directory structure as symbol files")
michael@0:     parser.add_option("-a", "--archs",
michael@0:                       action="store", dest="archs",
michael@0:                       help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)")
michael@0:     parser.add_option("-s", "--srcdir",
michael@0:                       action="append", dest="srcdir", default=[],
michael@0:                       help="Use SRCDIR to determine relative paths to source files")
michael@0:     parser.add_option("-v", "--vcs-info",
michael@0:                       action="store_true", dest="vcsinfo",
michael@0:                       help="Try to retrieve VCS info for each FILE listed in the output")
michael@0:     parser.add_option("-i", "--source-index",
michael@0:                       action="store_true", dest="srcsrv", default=False,
michael@0:                       help="Add source index information to debug files, making them suitable for use in a source server.")
michael@0:     parser.add_option("-x", "--exclude",
michael@0:                       action="append", dest="exclude", default=[], metavar="PATTERN",
michael@0:                       help="Skip processing files matching PATTERN.")
michael@0:     parser.add_option("--repo-manifest",
michael@0:                       action="store", dest="repo_manifest",
michael@0:                       help="""Get source information from this XML manifest
michael@0: produced by the `repo manifest -r` command.
michael@0: """)
michael@0:     (options, args) = parser.parse_args()
michael@0: 
michael@0:     #check to see if the pdbstr.exe exists
michael@0:     if options.srcsrv:
michael@0:         pdbstr = os.environ.get("PDBSTR_PATH")
michael@0:         if not os.path.exists(pdbstr):
michael@0:             print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n"
michael@0:             sys.exit(1)
michael@0: 
michael@0:     if len(args) < 3:
michael@0:         parser.error("not enough arguments")
michael@0:         exit(1)
michael@0: 
michael@0:     dumper = GetPlatformSpecificDumper(dump_syms=args[0],
michael@0:                                        symbol_path=args[1],
michael@0:                                        copy_debug=options.copy_debug,
michael@0:                                        archs=options.archs,
michael@0:                                        srcdirs=options.srcdir,
michael@0:                                        vcsinfo=options.vcsinfo,
michael@0:                                        srcsrv=options.srcsrv,
michael@0:                                        exclude=options.exclude,
michael@0:                                        repo_manifest=options.repo_manifest)
michael@0:     for arg in args[2:]:
michael@0:         dumper.Process(arg)
michael@0:     dumper.Finish()
michael@0: 
michael@0: # run main if run directly
michael@0: if __name__ == "__main__":
michael@0:     # set up the multiprocessing infrastructure before we start;
michael@0:     # note that this needs to be in the __main__ guard, or else Windows will choke
michael@0:     Dumper.GlobalInit()
michael@0: 
michael@0:     main()