toolkit/crashreporter/tools/symbolstore.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/toolkit/crashreporter/tools/symbolstore.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,917 @@
     1.4 +#!/bin/env python
     1.5 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.6 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.8 +#
     1.9 +# Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
    1.10 +#                                <debug info files or dirs>
    1.11 +#   Runs dump_syms on each debug info file specified on the command line,
    1.12 +#   then places the resulting symbol file in the proper directory
    1.13 +#   structure in the symbol store path.  Accepts multiple files
    1.14 +#   on the command line, so can be called as part of a pipe using
    1.15 +#   find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
    1.16 +#   But really, you might just want to pass it <dir>.
    1.17 +#
    1.18 +#   Parameters accepted:
    1.19 +#     -c           : Copy debug info files to the same directory structure
    1.20 +#                    as sym files
    1.21 +#     -a "<archs>" : Run dump_syms -a <arch> for each space separated
    1.22 +#                    cpu architecture in <archs> (only on OS X)
    1.23 +#     -s <srcdir>  : Use <srcdir> as the top source directory to
    1.24 +#                    generate relative filenames.
    1.25 +
    1.26 +import sys
    1.27 +import platform
    1.28 +import os
    1.29 +import re
    1.30 +import shutil
    1.31 +import textwrap
    1.32 +import fnmatch
    1.33 +import subprocess
    1.34 +import urlparse
    1.35 +import multiprocessing
    1.36 +import collections
    1.37 +from optparse import OptionParser
    1.38 +from xml.dom.minidom import parse
    1.39 +
    1.40 +# Utility classes
    1.41 +
    1.42 +class VCSFileInfo:
    1.43 +    """ A base class for version-controlled file information. Ensures that the
    1.44 +        following attributes are generated only once (successfully):
    1.45 +
    1.46 +            self.root
    1.47 +            self.clean_root
    1.48 +            self.revision
    1.49 +            self.filename
    1.50 +
    1.51 +        The attributes are generated by a single call to the GetRoot,
    1.52 +        GetRevision, and GetFilename methods. Those methods are explicitly not
    1.53 +        implemented here and must be implemented in derived classes. """
    1.54 +
    1.55 +    def __init__(self, file):
    1.56 +        if not file:
    1.57 +            raise ValueError
    1.58 +        self.file = file
    1.59 +
    1.60 +    def __getattr__(self, name):
    1.61 +        """ __getattr__ is only called for attributes that are not set on self,
    1.62 +            so setting self.[attr] will prevent future calls to the GetRoot,
    1.63 +            GetRevision, and GetFilename methods. We don't set the values on
    1.64 +            failure on the off chance that a future call might succeed. """
    1.65 +
    1.66 +        if name == "root":
    1.67 +            root = self.GetRoot()
    1.68 +            if root:
    1.69 +                self.root = root
    1.70 +            return root
    1.71 +
    1.72 +        elif name == "clean_root":
    1.73 +            clean_root = self.GetCleanRoot()
    1.74 +            if clean_root:
    1.75 +                self.clean_root = clean_root
    1.76 +            return clean_root
    1.77 +
    1.78 +        elif name == "revision":
    1.79 +            revision = self.GetRevision()
    1.80 +            if revision:
    1.81 +                self.revision = revision
    1.82 +            return revision
    1.83 +
    1.84 +        elif name == "filename":
    1.85 +            filename = self.GetFilename()
    1.86 +            if filename:
    1.87 +                self.filename = filename
    1.88 +            return filename
    1.89 +
    1.90 +        raise AttributeError
    1.91 +
    1.92 +    def GetRoot(self):
    1.93 +        """ This method should return the unmodified root for the file or 'None'
    1.94 +            on failure. """
    1.95 +        raise NotImplementedError
    1.96 +
    1.97 +    def GetCleanRoot(self):
    1.98 +        """ This method should return the repository root for the file or 'None'
    1.99 +            on failure. """
   1.100 +        raise NotImplementedErrors
   1.101 +
   1.102 +    def GetRevision(self):
   1.103 +        """ This method should return the revision number for the file or 'None'
   1.104 +            on failure. """
   1.105 +        raise NotImplementedError
   1.106 +
   1.107 +    def GetFilename(self):
   1.108 +        """ This method should return the repository-specific filename for the
   1.109 +            file or 'None' on failure. """
   1.110 +        raise NotImplementedError
   1.111 +
   1.112 +
   1.113 +# This regex separates protocol and optional username/password from a url.
   1.114 +# For instance, all the following urls will be transformed into
   1.115 +# 'foo.com/bar':
   1.116 +#
   1.117 +#   http://foo.com/bar
   1.118 +#   svn+ssh://user@foo.com/bar
   1.119 +#   svn+ssh://user:pass@foo.com/bar
   1.120 +#
   1.121 +rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$')
   1.122 +
   1.123 +def read_output(*args):
   1.124 +    (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate()
   1.125 +    return stdout.rstrip()
   1.126 +
   1.127 +class HGRepoInfo:
   1.128 +    def __init__(self, path):
   1.129 +        self.path = path
   1.130 +        rev = read_output('hg', '-R', path,
   1.131 +                          'parent', '--template={node|short}')
   1.132 +        # Look for the default hg path.  If SRVSRV_ROOT is set, we
   1.133 +        # don't bother asking hg.
   1.134 +        hg_root = os.environ.get("SRCSRV_ROOT")
   1.135 +        if hg_root:
   1.136 +            root = hg_root
   1.137 +        else:
   1.138 +            root = read_output('hg', '-R', path,
   1.139 +                               'showconfig', 'paths.default')
   1.140 +            if not root:
   1.141 +                print >> sys.stderr, "Failed to get HG Repo for %s" % path
   1.142 +        cleanroot = None
   1.143 +        if root:
   1.144 +            match = rootRegex.match(root)
   1.145 +            if match:
   1.146 +                cleanroot = match.group(1)
   1.147 +                if cleanroot.endswith('/'):
   1.148 +                    cleanroot = cleanroot[:-1]
   1.149 +        if cleanroot is None:
   1.150 +            print >> sys.stderr, textwrap.dedent("""\
   1.151 +                Could not determine repo info for %s.  This is either not a clone of the web-based
   1.152 +                repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path
   1.153 +            sys.exit(1)
   1.154 +        self.rev = rev
   1.155 +        self.root = root
   1.156 +        self.cleanroot = cleanroot
   1.157 +
   1.158 +    def GetFileInfo(self, file):
   1.159 +        return HGFileInfo(file, self)
   1.160 +
   1.161 +class HGFileInfo(VCSFileInfo):
   1.162 +    def __init__(self, file, repo):
   1.163 +        VCSFileInfo.__init__(self, file)
   1.164 +        self.repo = repo
   1.165 +        self.file = os.path.relpath(file, repo.path)
   1.166 +
   1.167 +    def GetRoot(self):
   1.168 +        return self.repo.root
   1.169 +
   1.170 +    def GetCleanRoot(self):
   1.171 +        return self.repo.cleanroot
   1.172 +
   1.173 +    def GetRevision(self):
   1.174 +        return self.repo.rev
   1.175 +
   1.176 +    def GetFilename(self):
   1.177 +        if self.revision and self.clean_root:
   1.178 +            return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision)
   1.179 +        return self.file
   1.180 +
   1.181 +class GitRepoInfo:
   1.182 +    """
   1.183 +    Info about a local git repository. Does not currently
   1.184 +    support discovering info about a git clone, the info must be
   1.185 +    provided out-of-band.
   1.186 +    """
   1.187 +    def __init__(self, path, rev, root):
   1.188 +        self.path = path
   1.189 +        cleanroot = None
   1.190 +        if root:
   1.191 +            match = rootRegex.match(root)
   1.192 +            if match:
   1.193 +                cleanroot = match.group(1)
   1.194 +                if cleanroot.endswith('/'):
   1.195 +                    cleanroot = cleanroot[:-1]
   1.196 +        if cleanroot is None:
   1.197 +            print >> sys.stderr, textwrap.dedent("""\
   1.198 +                Could not determine repo info for %s (%s).  This is either not a clone of a web-based
   1.199 +                repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root)
   1.200 +            sys.exit(1)
   1.201 +        self.rev = rev
   1.202 +        self.cleanroot = cleanroot
   1.203 +
   1.204 +    def GetFileInfo(self, file):
   1.205 +        return GitFileInfo(file, self)
   1.206 +
   1.207 +class GitFileInfo(VCSFileInfo):
   1.208 +    def __init__(self, file, repo):
   1.209 +        VCSFileInfo.__init__(self, file)
   1.210 +        self.repo = repo
   1.211 +        self.file = os.path.relpath(file, repo.path)
   1.212 +
   1.213 +    def GetRoot(self):
   1.214 +        return self.repo.path
   1.215 +
   1.216 +    def GetCleanRoot(self):
   1.217 +        return self.repo.cleanroot
   1.218 +
   1.219 +    def GetRevision(self):
   1.220 +        return self.repo.rev
   1.221 +
   1.222 +    def GetFilename(self):
   1.223 +        if self.revision and self.clean_root:
   1.224 +            return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
   1.225 +        return self.file
   1.226 +
   1.227 +# Utility functions
   1.228 +
   1.229 +# A cache of files for which VCS info has already been determined. Used to
   1.230 +# prevent extra filesystem activity or process launching.
   1.231 +vcsFileInfoCache = {}
   1.232 +
   1.233 +def IsInDir(file, dir):
   1.234 +    # the lower() is to handle win32+vc8, where
   1.235 +    # the source filenames come out all lowercase,
   1.236 +    # but the srcdir can be mixed case
   1.237 +    return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
   1.238 +
   1.239 +def GetVCSFilenameFromSrcdir(file, srcdir):
   1.240 +    if srcdir not in Dumper.srcdirRepoInfo:
   1.241 +        # Not in cache, so find it adnd cache it
   1.242 +        if os.path.isdir(os.path.join(srcdir, '.hg')):
   1.243 +            Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
   1.244 +        else:
   1.245 +            # Unknown VCS or file is not in a repo.
   1.246 +            return None
   1.247 +    return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file)
   1.248 +
   1.249 +def GetVCSFilename(file, srcdirs):
   1.250 +    """Given a full path to a file, and the top source directory,
   1.251 +    look for version control information about this file, and return
   1.252 +    a tuple containing
   1.253 +    1) a specially formatted filename that contains the VCS type,
   1.254 +    VCS location, relative filename, and revision number, formatted like:
   1.255 +    vcs:vcs location:filename:revision
   1.256 +    For example:
   1.257 +    cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36
   1.258 +    2) the unmodified root information if it exists"""
   1.259 +    (path, filename) = os.path.split(file)
   1.260 +    if path == '' or filename == '':
   1.261 +        return (file, None)
   1.262 +
   1.263 +    fileInfo = None
   1.264 +    root = ''
   1.265 +    if file in vcsFileInfoCache:
   1.266 +        # Already cached this info, use it.
   1.267 +        fileInfo = vcsFileInfoCache[file]
   1.268 +    else:
   1.269 +        for srcdir in srcdirs:
   1.270 +            if not IsInDir(file, srcdir):
   1.271 +                continue
   1.272 +            fileInfo = GetVCSFilenameFromSrcdir(file, srcdir)
   1.273 +            if fileInfo:
   1.274 +                vcsFileInfoCache[file] = fileInfo
   1.275 +                break
   1.276 +
   1.277 +    if fileInfo:
   1.278 +        file = fileInfo.filename
   1.279 +        root = fileInfo.root
   1.280 +
   1.281 +    # we want forward slashes on win32 paths
   1.282 +    return (file.replace("\\", "/"), root)
   1.283 +
   1.284 +def GetPlatformSpecificDumper(**kwargs):
   1.285 +    """This function simply returns a instance of a subclass of Dumper
   1.286 +    that is appropriate for the current platform."""
   1.287 +    # Python 2.5 has a bug where platform.system() returns 'Microsoft'.
   1.288 +    # Remove this when we no longer support Python 2.5.
   1.289 +    return {'Windows': Dumper_Win32,
   1.290 +            'Microsoft': Dumper_Win32,
   1.291 +            'Linux': Dumper_Linux,
   1.292 +            'Sunos5': Dumper_Solaris,
   1.293 +            'Darwin': Dumper_Mac}[platform.system()](**kwargs)
   1.294 +
   1.295 +def SourceIndex(fileStream, outputPath, vcs_root):
   1.296 +    """Takes a list of files, writes info to a data block in a .stream file"""
   1.297 +    # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
   1.298 +    # Create the srcsrv data block that indexes the pdb file
   1.299 +    result = True
   1.300 +    pdbStreamFile = open(outputPath, "w")
   1.301 +    pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''')
   1.302 +    pdbStreamFile.write(vcs_root)
   1.303 +    pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''')
   1.304 +    pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above
   1.305 +    pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n")
   1.306 +    pdbStreamFile.close()
   1.307 +    return result
   1.308 +
   1.309 +def WorkerInitializer(cls, lock, srcdirRepoInfo):
   1.310 +    """Windows worker processes won't have run GlobalInit, and due to a lack of fork(),
   1.311 +    won't inherit the class variables from the parent. They only need a few variables,
   1.312 +    so we run an initializer to set them. Redundant but harmless on other platforms."""
   1.313 +    cls.lock = lock
   1.314 +    cls.srcdirRepoInfo = srcdirRepoInfo
   1.315 +
   1.316 +def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg):
   1.317 +    """multiprocessing can't handle methods as Process targets, so we define
   1.318 +    a simple wrapper function around the work method."""
   1.319 +    return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg)
   1.320 +
   1.321 +class Dumper:
   1.322 +    """This class can dump symbols from a file with debug info, and
   1.323 +    store the output in a directory structure that is valid for use as
   1.324 +    a Breakpad symbol server.  Requires a path to a dump_syms binary--
   1.325 +    |dump_syms| and a directory to store symbols in--|symbol_path|.
   1.326 +    Optionally takes a list of processor architectures to process from
   1.327 +    each debug file--|archs|, the full path to the top source
   1.328 +    directory--|srcdir|, for generating relative source file names,
   1.329 +    and an option to copy debug info files alongside the dumped
   1.330 +    symbol files--|copy_debug|, mostly useful for creating a
   1.331 +    Microsoft Symbol Server from the resulting output.
   1.332 +
   1.333 +    You don't want to use this directly if you intend to call
   1.334 +    ProcessDir.  Instead, call GetPlatformSpecificDumper to
   1.335 +    get an instance of a subclass.
   1.336 + 
   1.337 +    Processing is performed asynchronously via worker processes; in
   1.338 +    order to wait for processing to finish and cleanup correctly, you
   1.339 +    must call Finish after all Process/ProcessDir calls have been made.
   1.340 +    You must also call Dumper.GlobalInit before creating or using any
   1.341 +    instances."""
   1.342 +    def __init__(self, dump_syms, symbol_path,
   1.343 +                 archs=None,
   1.344 +                 srcdirs=[],
   1.345 +                 copy_debug=False,
   1.346 +                 vcsinfo=False,
   1.347 +                 srcsrv=False,
   1.348 +                 exclude=[],
   1.349 +                 repo_manifest=None):
   1.350 +        # popen likes absolute paths, at least on windows
   1.351 +        self.dump_syms = os.path.abspath(dump_syms)
   1.352 +        self.symbol_path = symbol_path
   1.353 +        if archs is None:
   1.354 +            # makes the loop logic simpler
   1.355 +            self.archs = ['']
   1.356 +        else:
   1.357 +            self.archs = ['-a %s' % a for a in archs.split()]
   1.358 +        self.srcdirs = [os.path.normpath(a) for a in srcdirs]
   1.359 +        self.copy_debug = copy_debug
   1.360 +        self.vcsinfo = vcsinfo
   1.361 +        self.srcsrv = srcsrv
   1.362 +        self.exclude = exclude[:]
   1.363 +        if repo_manifest:
   1.364 +            self.parse_repo_manifest(repo_manifest)
   1.365 +
   1.366 +        # book-keeping to keep track of our jobs and the cleanup work per file tuple
   1.367 +        self.files_record = {}
   1.368 +        self.jobs_record = collections.defaultdict(int)
   1.369 +
   1.370 +    @classmethod
   1.371 +    def GlobalInit(cls, module=multiprocessing):
   1.372 +        """Initialize the class globals for the multiprocessing setup; must
   1.373 +        be called before any Dumper instances are created and used. Test cases
   1.374 +        may pass in a different module to supply Manager and Pool objects,
   1.375 +        usually multiprocessing.dummy."""
   1.376 +        num_cpus = module.cpu_count()
   1.377 +        if num_cpus is None:
   1.378 +            # assume a dual core machine if we can't find out for some reason
   1.379 +            # probably better on single core anyway due to I/O constraints
   1.380 +            num_cpus = 2
   1.381 +
   1.382 +        # have to create any locks etc before the pool
   1.383 +        cls.manager = module.Manager()
   1.384 +        cls.jobs_condition = Dumper.manager.Condition()
   1.385 +        cls.lock = Dumper.manager.RLock()
   1.386 +        cls.srcdirRepoInfo = Dumper.manager.dict()
   1.387 +        cls.pool = module.Pool(num_cpus, WorkerInitializer,
   1.388 +                               (cls, cls.lock, cls.srcdirRepoInfo))
   1.389 +
   1.390 +    def JobStarted(self, file_key):
   1.391 +        """Increments the number of submitted jobs for the specified key file,
   1.392 +        defined as the original file we processed; note that a single key file
   1.393 +        can generate up to 1 + len(self.archs) jobs in the Mac case."""
   1.394 +        with Dumper.jobs_condition:
   1.395 +            self.jobs_record[file_key] += 1
   1.396 +            Dumper.jobs_condition.notify_all()
   1.397 +
   1.398 +    def JobFinished(self, file_key):
   1.399 +        """Decrements the number of submitted jobs for the specified key file,
   1.400 +        defined as the original file we processed; once the count is back to 0,
   1.401 +        remove the entry from our record."""
   1.402 +        with Dumper.jobs_condition:
   1.403 +            self.jobs_record[file_key] -= 1
   1.404 +
   1.405 +            if self.jobs_record[file_key] == 0:
   1.406 +                del self.jobs_record[file_key]
   1.407 +
   1.408 +            Dumper.jobs_condition.notify_all()
   1.409 +
   1.410 +    def output(self, dest, output_str):
   1.411 +        """Writes |output_str| to |dest|, holding |lock|;
   1.412 +        terminates with a newline."""
   1.413 +        with Dumper.lock:
   1.414 +            dest.write(output_str + "\n")
   1.415 +            dest.flush()
   1.416 +
   1.417 +    def output_pid(self, dest, output_str):
   1.418 +        """Debugging output; prepends the pid to the string."""
   1.419 +        self.output(dest, "%d: %s" % (os.getpid(), output_str))
   1.420 +
   1.421 +    def parse_repo_manifest(self, repo_manifest):
   1.422 +        """
   1.423 +        Parse an XML manifest of repository info as produced
   1.424 +        by the `repo manifest -r` command.
   1.425 +        """
   1.426 +        doc = parse(repo_manifest)
   1.427 +        if doc.firstChild.tagName != "manifest":
   1.428 +            return
   1.429 +        # First, get remotes.
   1.430 +        def ensure_slash(u):
   1.431 +            if not u.endswith("/"):
   1.432 +                return u + "/"
   1.433 +            return u
   1.434 +        remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")])
   1.435 +        # And default remote.
   1.436 +        default_remote = None
   1.437 +        if doc.getElementsByTagName("default"):
   1.438 +            default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote")
   1.439 +        # Now get projects. Assume they're relative to repo_manifest.
   1.440 +        base_dir = os.path.abspath(os.path.dirname(repo_manifest))
   1.441 +        for proj in doc.getElementsByTagName("project"):
   1.442 +            # name is the repository URL relative to the remote path.
   1.443 +            name = proj.getAttribute("name")
   1.444 +            # path is the path on-disk, relative to the manifest file.
   1.445 +            path = proj.getAttribute("path")
   1.446 +            # revision is the changeset ID.
   1.447 +            rev = proj.getAttribute("revision")
   1.448 +            # remote is the base URL to use.
   1.449 +            remote = proj.getAttribute("remote")
   1.450 +            # remote defaults to the <default remote>.
   1.451 +            if not remote:
   1.452 +                remote = default_remote
   1.453 +            # path defaults to name.
   1.454 +            if not path:
   1.455 +                path = name
   1.456 +            if not (name and path and rev and remote):
   1.457 +                print "Skipping project %s" % proj.toxml()
   1.458 +                continue
   1.459 +            remote = remotes[remote]
   1.460 +            # Turn git URLs into http URLs so that urljoin works.
   1.461 +            if remote.startswith("git:"):
   1.462 +                remote = "http" + remote[3:]
   1.463 +            # Add this project to srcdirs.
   1.464 +            srcdir = os.path.join(base_dir, path)
   1.465 +            self.srcdirs.append(srcdir)
   1.466 +            # And cache its VCS file info. Currently all repos mentioned
   1.467 +            # in a repo manifest are assumed to be git.
   1.468 +            root = urlparse.urljoin(remote, name)
   1.469 +            Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
   1.470 +
   1.471 +    # subclasses override this
   1.472 +    def ShouldProcess(self, file):
   1.473 +        return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude)
   1.474 +
   1.475 +    # and can override this
   1.476 +    def ShouldSkipDir(self, dir):
   1.477 +        return False
   1.478 +
   1.479 +    def RunFileCommand(self, file):
   1.480 +        """Utility function, returns the output of file(1)"""
   1.481 +        try:
   1.482 +            # we use -L to read the targets of symlinks,
   1.483 +            # and -b to print just the content, not the filename
   1.484 +            return os.popen("file -Lb " + file).read()
   1.485 +        except:
   1.486 +            return ""
   1.487 +
   1.488 +    # This is a no-op except on Win32
   1.489 +    def FixFilenameCase(self, file):
   1.490 +        return file
   1.491 +
   1.492 +    # This is a no-op except on Win32
   1.493 +    def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
   1.494 +        return ""
   1.495 +
   1.496 +    # subclasses override this if they want to support this
   1.497 +    def CopyDebug(self, file, debug_file, guid):
   1.498 +        pass
   1.499 +
   1.500 +    def Finish(self, stop_pool=True):
   1.501 +        """Wait for the expected number of jobs to be submitted, and then
   1.502 +        wait for the pool to finish processing them. By default, will close
   1.503 +        and clear the pool, but for testcases that need multiple runs, pass
   1.504 +        stop_pool = False."""
   1.505 +        with Dumper.jobs_condition:
   1.506 +            while len(self.jobs_record) != 0:
   1.507 +                Dumper.jobs_condition.wait()
   1.508 +        if stop_pool:
   1.509 +            Dumper.pool.close()
   1.510 +            Dumper.pool.join()
   1.511 +
   1.512 +    def Process(self, file_or_dir):
   1.513 +        """Process a file or all the (valid) files in a directory; processing is performed
   1.514 +        asynchronously, and Finish must be called to wait for it complete and cleanup."""
   1.515 +        if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir):
   1.516 +            self.ProcessDir(file_or_dir)
   1.517 +        elif os.path.isfile(file_or_dir):
   1.518 +            self.ProcessFiles((file_or_dir,))
   1.519 +
   1.520 +    def ProcessDir(self, dir):
   1.521 +        """Process all the valid files in this directory.  Valid files
   1.522 +        are determined by calling ShouldProcess; processing is performed
   1.523 +        asynchronously, and Finish must be called to wait for it complete and cleanup."""
   1.524 +        for root, dirs, files in os.walk(dir):
   1.525 +            for d in dirs[:]:
   1.526 +                if self.ShouldSkipDir(d):
   1.527 +                    dirs.remove(d)
   1.528 +            for f in files:
   1.529 +                fullpath = os.path.join(root, f)
   1.530 +                if self.ShouldProcess(fullpath):
   1.531 +                    self.ProcessFiles((fullpath,))
   1.532 +
   1.533 +    def SubmitJob(self, file_key, func, args, callback):
   1.534 +        """Submits a job to the pool of workers; increments the number of submitted jobs."""
   1.535 +        self.JobStarted(file_key)
   1.536 +        res = Dumper.pool.apply_async(func, args=args, callback=callback)
   1.537 +
   1.538 +    def ProcessFilesFinished(self, res):
   1.539 +        """Callback from multiprocesing when ProcessFilesWork finishes;
   1.540 +        run the cleanup work, if any"""
   1.541 +        self.JobFinished(res['files'][-1])
   1.542 +        # only run the cleanup function once per tuple of files
   1.543 +        self.files_record[res['files']] += 1
   1.544 +        if self.files_record[res['files']] == len(self.archs):
   1.545 +            del self.files_record[res['files']]
   1.546 +            if res['after']:
   1.547 +                res['after'](res['status'], res['after_arg'])
   1.548 +
   1.549 +    def ProcessFiles(self, files, after=None, after_arg=None):
   1.550 +        """Dump symbols from these files into a symbol file, stored
   1.551 +        in the proper directory structure in  |symbol_path|; processing is performed
   1.552 +        asynchronously, and Finish must be called to wait for it complete and cleanup.
   1.553 +        All files after the first are fallbacks in case the first file does not process
   1.554 +        successfully; if it does, no other files will be touched."""
   1.555 +        self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files))
   1.556 +
   1.557 +        # tries to get the vcs root from the .mozconfig first - if it's not set
   1.558 +        # the tinderbox vcs path will be assigned further down
   1.559 +        vcs_root = os.environ.get("SRCSRV_ROOT")
   1.560 +        for arch_num, arch in enumerate(self.archs):
   1.561 +            self.files_record[files] = 0 # record that we submitted jobs for this tuple of files
   1.562 +            self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished)
   1.563 +
   1.564 +    def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg):
   1.565 +        self.output_pid(sys.stderr, "Worker processing files: %s" % (files,))
   1.566 +
   1.567 +        # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on
   1.568 +        result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files }
   1.569 +
   1.570 +        sourceFileStream = ''
   1.571 +        for file in files:
   1.572 +            # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully
   1.573 +            try:
   1.574 +                proc = subprocess.Popen([self.dump_syms] + arch.split() + [file],
   1.575 +                                        stdout=subprocess.PIPE)
   1.576 +                module_line = proc.stdout.next()
   1.577 +                if module_line.startswith("MODULE"):
   1.578 +                    # MODULE os cpu guid debug_file
   1.579 +                    (guid, debug_file) = (module_line.split())[3:5]
   1.580 +                    # strip off .pdb extensions, and append .sym
   1.581 +                    sym_file = re.sub("\.pdb$", "", debug_file) + ".sym"
   1.582 +                    # we do want forward slashes here
   1.583 +                    rel_path = os.path.join(debug_file,
   1.584 +                                            guid,
   1.585 +                                            sym_file).replace("\\", "/")
   1.586 +                    full_path = os.path.normpath(os.path.join(self.symbol_path,
   1.587 +                                                              rel_path))
   1.588 +                    try:
   1.589 +                        os.makedirs(os.path.dirname(full_path))
   1.590 +                    except OSError: # already exists
   1.591 +                        pass
   1.592 +                    f = open(full_path, "w")
   1.593 +                    f.write(module_line)
   1.594 +                    # now process the rest of the output
   1.595 +                    for line in proc.stdout:
   1.596 +                        if line.startswith("FILE"):
   1.597 +                            # FILE index filename
   1.598 +                            (x, index, filename) = line.rstrip().split(None, 2)
   1.599 +                            if sys.platform == "sunos5":
   1.600 +                                for srcdir in self.srcdirs:
   1.601 +                                    start = filename.find(self.srcdir)
   1.602 +                                    if start != -1:
   1.603 +                                        filename = filename[start:]
   1.604 +                                        break
   1.605 +                            filename = self.FixFilenameCase(filename)
   1.606 +                            sourcepath = filename
   1.607 +                            if self.vcsinfo:
   1.608 +                                (filename, rootname) = GetVCSFilename(filename, self.srcdirs)
   1.609 +                                # sets vcs_root in case the loop through files were to end on an empty rootname
   1.610 +                                if vcs_root is None:
   1.611 +                                  if rootname:
   1.612 +                                     vcs_root = rootname
   1.613 +                            # gather up files with hg for indexing   
   1.614 +                            if filename.startswith("hg"):
   1.615 +                                (ver, checkout, source_file, revision) = filename.split(":", 3)
   1.616 +                                sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n"
   1.617 +                            f.write("FILE %s %s\n" % (index, filename))
   1.618 +                        else:
   1.619 +                            # pass through all other lines unchanged
   1.620 +                            f.write(line)
   1.621 +                            # we want to return true only if at least one line is not a MODULE or FILE line
   1.622 +                            result['status'] = True
   1.623 +                    f.close()
   1.624 +                    proc.wait()
   1.625 +                    # we output relative paths so callers can get a list of what
   1.626 +                    # was generated
   1.627 +                    self.output(sys.stdout, rel_path)
   1.628 +                    if self.srcsrv and vcs_root:
   1.629 +                        # add source server indexing to the pdb file
   1.630 +                        self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root)
   1.631 +                    # only copy debug the first time if we have multiple architectures
   1.632 +                    if self.copy_debug and arch_num == 0:
   1.633 +                        self.CopyDebug(file, debug_file, guid)
   1.634 +            except StopIteration:
   1.635 +                pass
   1.636 +            except e:
   1.637 +                self.output(sys.stderr, "Unexpected error: %s" % (str(e),))
   1.638 +                raise
   1.639 +            if result['status']:
   1.640 +                # we only need 1 file to work
   1.641 +                break
   1.642 +        return result
   1.643 +
   1.644 +# Platform-specific subclasses.  For the most part, these just have
   1.645 +# logic to determine what files to extract symbols from.
   1.646 +
   1.647 +class Dumper_Win32(Dumper):
   1.648 +    fixedFilenameCaseCache = {}
   1.649 +
   1.650 +    def ShouldProcess(self, file):
   1.651 +        """This function will allow processing of pdb files that have dll
   1.652 +        or exe files with the same base name next to them."""
   1.653 +        if not Dumper.ShouldProcess(self, file):
   1.654 +            return False
   1.655 +        if file.endswith(".pdb"):
   1.656 +            (path,ext) = os.path.splitext(file)
   1.657 +            if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"):
   1.658 +                return True
   1.659 +        return False
   1.660 +
   1.661 +    def FixFilenameCase(self, file):
   1.662 +        """Recent versions of Visual C++ put filenames into
   1.663 +        PDB files as all lowercase.  If the file exists
   1.664 +        on the local filesystem, fix it."""
   1.665 +
   1.666 +        # Use a cached version if we have one.
   1.667 +        if file in self.fixedFilenameCaseCache:
   1.668 +            return self.fixedFilenameCaseCache[file]
   1.669 +
   1.670 +        result = file
   1.671 +
   1.672 +        (path, filename) = os.path.split(file)
   1.673 +        if os.path.isdir(path):
   1.674 +            lc_filename = filename.lower()
   1.675 +            for f in os.listdir(path):
   1.676 +                if f.lower() == lc_filename:
   1.677 +                    result = os.path.join(path, f)
   1.678 +                    break
   1.679 +
   1.680 +        # Cache the corrected version to avoid future filesystem hits.
   1.681 +        self.fixedFilenameCaseCache[file] = result
   1.682 +        return result
   1.683 +
   1.684 +    def CopyDebug(self, file, debug_file, guid):
   1.685 +        rel_path = os.path.join(debug_file,
   1.686 +                                guid,
   1.687 +                                debug_file).replace("\\", "/")
   1.688 +        full_path = os.path.normpath(os.path.join(self.symbol_path,
   1.689 +                                                  rel_path))
   1.690 +        shutil.copyfile(file, full_path)
   1.691 +        # try compressing it
   1.692 +        compressed_file = os.path.splitext(full_path)[0] + ".pd_"
   1.693 +        # ignore makecab's output
   1.694 +        success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D",
   1.695 +                                   "CompressionMemory=21",
   1.696 +                                   full_path, compressed_file],
   1.697 +                                  stdout=open("NUL:","w"), stderr=subprocess.STDOUT)
   1.698 +        if success == 0 and os.path.exists(compressed_file):
   1.699 +            os.unlink(full_path)
   1.700 +            self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_")
   1.701 +        else:
   1.702 +            self.output(sys.stdout, rel_path)
   1.703 +        
   1.704 +    def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
   1.705 +        # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
   1.706 +        debug_file = os.path.abspath(debug_file)
   1.707 +        streamFilename = debug_file + ".stream"
   1.708 +        stream_output_path = os.path.abspath(streamFilename)
   1.709 +        # Call SourceIndex to create the .stream file
   1.710 +        result = SourceIndex(sourceFileStream, stream_output_path, vcs_root)
   1.711 +        if self.copy_debug:
   1.712 +            pdbstr_path = os.environ.get("PDBSTR_PATH")
   1.713 +            pdbstr = os.path.normpath(pdbstr_path)
   1.714 +            subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file),
   1.715 +                             "-i:" + os.path.basename(streamFilename), "-s:srcsrv"],
   1.716 +                            cwd=os.path.dirname(stream_output_path))
   1.717 +            # clean up all the .stream files when done
   1.718 +            os.remove(stream_output_path)
   1.719 +        return result
   1.720 +
   1.721 +class Dumper_Linux(Dumper):
   1.722 +    objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy'
   1.723 +    def ShouldProcess(self, file):
   1.724 +        """This function will allow processing of files that are
   1.725 +        executable, or end with the .so extension, and additionally
   1.726 +        file(1) reports as being ELF files.  It expects to find the file
   1.727 +        command in PATH."""
   1.728 +        if not Dumper.ShouldProcess(self, file):
   1.729 +            return False
   1.730 +        if file.endswith(".so") or os.access(file, os.X_OK):
   1.731 +            return self.RunFileCommand(file).startswith("ELF")
   1.732 +        return False
   1.733 +
   1.734 +    def CopyDebug(self, file, debug_file, guid):
   1.735 +        # We want to strip out the debug info, and add a
   1.736 +        # .gnu_debuglink section to the object, so the debugger can
   1.737 +        # actually load our debug info later.
   1.738 +        file_dbg = file + ".dbg"
   1.739 +        if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \
   1.740 +           subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0:
   1.741 +            rel_path = os.path.join(debug_file,
   1.742 +                                    guid,
   1.743 +                                    debug_file + ".dbg")
   1.744 +            full_path = os.path.normpath(os.path.join(self.symbol_path,
   1.745 +                                                      rel_path))
   1.746 +            shutil.move(file_dbg, full_path)
   1.747 +            # gzip the shipped debug files
   1.748 +            os.system("gzip %s" % full_path)
   1.749 +            self.output(sys.stdout, rel_path + ".gz")
   1.750 +        else:
   1.751 +            if os.path.isfile(file_dbg):
   1.752 +                os.unlink(file_dbg)
   1.753 +
   1.754 +class Dumper_Solaris(Dumper):
   1.755 +    def RunFileCommand(self, file):
   1.756 +        """Utility function, returns the output of file(1)"""
   1.757 +        try:
   1.758 +            output = os.popen("file " + file).read()
   1.759 +            return output.split('\t')[1];
   1.760 +        except:
   1.761 +            return ""
   1.762 +
   1.763 +    def ShouldProcess(self, file):
   1.764 +        """This function will allow processing of files that are
   1.765 +        executable, or end with the .so extension, and additionally
   1.766 +        file(1) reports as being ELF files.  It expects to find the file
   1.767 +        command in PATH."""
   1.768 +        if not Dumper.ShouldProcess(self, file):
   1.769 +            return False
   1.770 +        if file.endswith(".so") or os.access(file, os.X_OK):
   1.771 +            return self.RunFileCommand(file).startswith("ELF")
   1.772 +        return False
   1.773 +
   1.774 +def StartProcessFilesWorkMac(dumper, file):
   1.775 +    """multiprocessing can't handle methods as Process targets, so we define
   1.776 +    a simple wrapper function around the work method."""
   1.777 +    return dumper.ProcessFilesWorkMac(file)
   1.778 +
   1.779 +def AfterMac(status, dsymbundle):
   1.780 +    """Cleanup function to run on Macs after we process the file(s)."""
   1.781 +    # CopyDebug will already have been run from Dumper.ProcessFiles
   1.782 +    shutil.rmtree(dsymbundle)
   1.783 +
   1.784 +class Dumper_Mac(Dumper):
   1.785 +    def ShouldProcess(self, file):
   1.786 +        """This function will allow processing of files that are
   1.787 +        executable, or end with the .dylib extension, and additionally
   1.788 +        file(1) reports as being Mach-O files.  It expects to find the file
   1.789 +        command in PATH."""
   1.790 +        if not Dumper.ShouldProcess(self, file):
   1.791 +            return False
   1.792 +        if file.endswith(".dylib") or os.access(file, os.X_OK):
   1.793 +            return self.RunFileCommand(file).startswith("Mach-O")
   1.794 +        return False
   1.795 +
   1.796 +    def ShouldSkipDir(self, dir):
   1.797 +        """We create .dSYM bundles on the fly, but if someone runs
   1.798 +        buildsymbols twice, we should skip any bundles we created
   1.799 +        previously, otherwise we'll recurse into them and try to 
   1.800 +        dump the inner bits again."""
   1.801 +        if dir.endswith(".dSYM"):
   1.802 +            return True
   1.803 +        return False
   1.804 +
   1.805 +    def ProcessFiles(self, files, after=None, after_arg=None):
   1.806 +        # also note, files must be len 1 here, since we're the only ones
   1.807 +        # that ever add more than one file to the list
   1.808 +        self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0]))
   1.809 +        self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished)
   1.810 +
   1.811 +    def ProcessFilesMacFinished(self, result):
   1.812 +        if result['status']:
   1.813 +            # kick off new jobs per-arch with our new list of files
   1.814 +            Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0])
   1.815 +        # only decrement jobs *after* that, since otherwise we'll remove the record for this file
   1.816 +        self.JobFinished(result['files'][-1])
   1.817 +
   1.818 +    def ProcessFilesWorkMac(self, file):
   1.819 +        """dump_syms on Mac needs to be run on a dSYM bundle produced
   1.820 +        by dsymutil(1), so run dsymutil here and pass the bundle name
   1.821 +        down to the superclass method instead."""
   1.822 +        self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,))
   1.823 +
   1.824 +        # our return is a status and a tuple of files to dump symbols for
   1.825 +        # the extra files are fallbacks; as soon as one is dumped successfully, we stop
   1.826 +        result = { 'status' : False, 'files' : None, 'file_key' : file }
   1.827 +        dsymbundle = file + ".dSYM"
   1.828 +        if os.path.exists(dsymbundle):
   1.829 +            shutil.rmtree(dsymbundle)
   1.830 +        # dsymutil takes --arch=foo instead of -a foo like everything else
   1.831 +        subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a]
   1.832 +                        + [file],
   1.833 +                        stdout=open("/dev/null","w"))
   1.834 +        if not os.path.exists(dsymbundle):
   1.835 +            # dsymutil won't produce a .dSYM for files without symbols
   1.836 +            self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,))
   1.837 +            result['status'] = False
   1.838 +            result['files'] = (file, )
   1.839 +            return result
   1.840 +
   1.841 +        result['status'] = True
   1.842 +        result['files'] = (dsymbundle, file)
   1.843 +        return result
   1.844 +
   1.845 +    def CopyDebug(self, file, debug_file, guid):
   1.846 +        """ProcessFiles has already produced a dSYM bundle, so we should just
   1.847 +        copy that to the destination directory. However, we'll package it
   1.848 +        into a .tar.bz2 because the debug symbols are pretty huge, and
   1.849 +        also because it's a bundle, so it's a directory. |file| here is the
   1.850 +        dSYM bundle, and |debug_file| is the original filename."""
   1.851 +        rel_path = os.path.join(debug_file,
   1.852 +                                guid,
   1.853 +                                os.path.basename(file) + ".tar.bz2")
   1.854 +        full_path = os.path.abspath(os.path.join(self.symbol_path,
   1.855 +                                                  rel_path))
   1.856 +        success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)],
   1.857 +                                  cwd=os.path.dirname(file),
   1.858 +                                  stdout=open("/dev/null","w"), stderr=subprocess.STDOUT)
   1.859 +        if success == 0 and os.path.exists(full_path):
   1.860 +            self.output(sys.stdout, rel_path)
   1.861 +
   1.862 +# Entry point if called as a standalone program
   1.863 +def main():
   1.864 +    parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>")
   1.865 +    parser.add_option("-c", "--copy",
   1.866 +                      action="store_true", dest="copy_debug", default=False,
   1.867 +                      help="Copy debug info files into the same directory structure as symbol files")
   1.868 +    parser.add_option("-a", "--archs",
   1.869 +                      action="store", dest="archs",
   1.870 +                      help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)")
   1.871 +    parser.add_option("-s", "--srcdir",
   1.872 +                      action="append", dest="srcdir", default=[],
   1.873 +                      help="Use SRCDIR to determine relative paths to source files")
   1.874 +    parser.add_option("-v", "--vcs-info",
   1.875 +                      action="store_true", dest="vcsinfo",
   1.876 +                      help="Try to retrieve VCS info for each FILE listed in the output")
   1.877 +    parser.add_option("-i", "--source-index",
   1.878 +                      action="store_true", dest="srcsrv", default=False,
   1.879 +                      help="Add source index information to debug files, making them suitable for use in a source server.")
   1.880 +    parser.add_option("-x", "--exclude",
   1.881 +                      action="append", dest="exclude", default=[], metavar="PATTERN",
   1.882 +                      help="Skip processing files matching PATTERN.")
   1.883 +    parser.add_option("--repo-manifest",
   1.884 +                      action="store", dest="repo_manifest",
   1.885 +                      help="""Get source information from this XML manifest
   1.886 +produced by the `repo manifest -r` command.
   1.887 +""")
   1.888 +    (options, args) = parser.parse_args()
   1.889 +
   1.890 +    #check to see if the pdbstr.exe exists
   1.891 +    if options.srcsrv:
   1.892 +        pdbstr = os.environ.get("PDBSTR_PATH")
   1.893 +        if not os.path.exists(pdbstr):
   1.894 +            print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n"
   1.895 +            sys.exit(1)
   1.896 +
   1.897 +    if len(args) < 3:
   1.898 +        parser.error("not enough arguments")
   1.899 +        exit(1)
   1.900 +
   1.901 +    dumper = GetPlatformSpecificDumper(dump_syms=args[0],
   1.902 +                                       symbol_path=args[1],
   1.903 +                                       copy_debug=options.copy_debug,
   1.904 +                                       archs=options.archs,
   1.905 +                                       srcdirs=options.srcdir,
   1.906 +                                       vcsinfo=options.vcsinfo,
   1.907 +                                       srcsrv=options.srcsrv,
   1.908 +                                       exclude=options.exclude,
   1.909 +                                       repo_manifest=options.repo_manifest)
   1.910 +    for arg in args[2:]:
   1.911 +        dumper.Process(arg)
   1.912 +    dumper.Finish()
   1.913 +
   1.914 +# run main if run directly
   1.915 +if __name__ == "__main__":
   1.916 +    # set up the multiprocessing infrastructure before we start;
   1.917 +    # note that this needs to be in the __main__ guard, or else Windows will choke
   1.918 +    Dumper.GlobalInit()
   1.919 +
   1.920 +    main()

mercurial