1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/tools/symbolstore.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,917 @@ 1.4 +#!/bin/env python 1.5 +# This Source Code Form is subject to the terms of the Mozilla Public 1.6 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.8 +# 1.9 +# Usage: symbolstore.py <params> <dump_syms path> <symbol store path> 1.10 +# <debug info files or dirs> 1.11 +# Runs dump_syms on each debug info file specified on the command line, 1.12 +# then places the resulting symbol file in the proper directory 1.13 +# structure in the symbol store path. Accepts multiple files 1.14 +# on the command line, so can be called as part of a pipe using 1.15 +# find <dir> | xargs symbolstore.pl <dump_syms> <storepath> 1.16 +# But really, you might just want to pass it <dir>. 1.17 +# 1.18 +# Parameters accepted: 1.19 +# -c : Copy debug info files to the same directory structure 1.20 +# as sym files 1.21 +# -a "<archs>" : Run dump_syms -a <arch> for each space separated 1.22 +# cpu architecture in <archs> (only on OS X) 1.23 +# -s <srcdir> : Use <srcdir> as the top source directory to 1.24 +# generate relative filenames. 1.25 + 1.26 +import sys 1.27 +import platform 1.28 +import os 1.29 +import re 1.30 +import shutil 1.31 +import textwrap 1.32 +import fnmatch 1.33 +import subprocess 1.34 +import urlparse 1.35 +import multiprocessing 1.36 +import collections 1.37 +from optparse import OptionParser 1.38 +from xml.dom.minidom import parse 1.39 + 1.40 +# Utility classes 1.41 + 1.42 +class VCSFileInfo: 1.43 + """ A base class for version-controlled file information. Ensures that the 1.44 + following attributes are generated only once (successfully): 1.45 + 1.46 + self.root 1.47 + self.clean_root 1.48 + self.revision 1.49 + self.filename 1.50 + 1.51 + The attributes are generated by a single call to the GetRoot, 1.52 + GetRevision, and GetFilename methods. Those methods are explicitly not 1.53 + implemented here and must be implemented in derived classes. """ 1.54 + 1.55 + def __init__(self, file): 1.56 + if not file: 1.57 + raise ValueError 1.58 + self.file = file 1.59 + 1.60 + def __getattr__(self, name): 1.61 + """ __getattr__ is only called for attributes that are not set on self, 1.62 + so setting self.[attr] will prevent future calls to the GetRoot, 1.63 + GetRevision, and GetFilename methods. We don't set the values on 1.64 + failure on the off chance that a future call might succeed. """ 1.65 + 1.66 + if name == "root": 1.67 + root = self.GetRoot() 1.68 + if root: 1.69 + self.root = root 1.70 + return root 1.71 + 1.72 + elif name == "clean_root": 1.73 + clean_root = self.GetCleanRoot() 1.74 + if clean_root: 1.75 + self.clean_root = clean_root 1.76 + return clean_root 1.77 + 1.78 + elif name == "revision": 1.79 + revision = self.GetRevision() 1.80 + if revision: 1.81 + self.revision = revision 1.82 + return revision 1.83 + 1.84 + elif name == "filename": 1.85 + filename = self.GetFilename() 1.86 + if filename: 1.87 + self.filename = filename 1.88 + return filename 1.89 + 1.90 + raise AttributeError 1.91 + 1.92 + def GetRoot(self): 1.93 + """ This method should return the unmodified root for the file or 'None' 1.94 + on failure. """ 1.95 + raise NotImplementedError 1.96 + 1.97 + def GetCleanRoot(self): 1.98 + """ This method should return the repository root for the file or 'None' 1.99 + on failure. """ 1.100 + raise NotImplementedErrors 1.101 + 1.102 + def GetRevision(self): 1.103 + """ This method should return the revision number for the file or 'None' 1.104 + on failure. """ 1.105 + raise NotImplementedError 1.106 + 1.107 + def GetFilename(self): 1.108 + """ This method should return the repository-specific filename for the 1.109 + file or 'None' on failure. """ 1.110 + raise NotImplementedError 1.111 + 1.112 + 1.113 +# This regex separates protocol and optional username/password from a url. 1.114 +# For instance, all the following urls will be transformed into 1.115 +# 'foo.com/bar': 1.116 +# 1.117 +# http://foo.com/bar 1.118 +# svn+ssh://user@foo.com/bar 1.119 +# svn+ssh://user:pass@foo.com/bar 1.120 +# 1.121 +rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$') 1.122 + 1.123 +def read_output(*args): 1.124 + (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate() 1.125 + return stdout.rstrip() 1.126 + 1.127 +class HGRepoInfo: 1.128 + def __init__(self, path): 1.129 + self.path = path 1.130 + rev = read_output('hg', '-R', path, 1.131 + 'parent', '--template={node|short}') 1.132 + # Look for the default hg path. If SRVSRV_ROOT is set, we 1.133 + # don't bother asking hg. 1.134 + hg_root = os.environ.get("SRCSRV_ROOT") 1.135 + if hg_root: 1.136 + root = hg_root 1.137 + else: 1.138 + root = read_output('hg', '-R', path, 1.139 + 'showconfig', 'paths.default') 1.140 + if not root: 1.141 + print >> sys.stderr, "Failed to get HG Repo for %s" % path 1.142 + cleanroot = None 1.143 + if root: 1.144 + match = rootRegex.match(root) 1.145 + if match: 1.146 + cleanroot = match.group(1) 1.147 + if cleanroot.endswith('/'): 1.148 + cleanroot = cleanroot[:-1] 1.149 + if cleanroot is None: 1.150 + print >> sys.stderr, textwrap.dedent("""\ 1.151 + Could not determine repo info for %s. This is either not a clone of the web-based 1.152 + repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path 1.153 + sys.exit(1) 1.154 + self.rev = rev 1.155 + self.root = root 1.156 + self.cleanroot = cleanroot 1.157 + 1.158 + def GetFileInfo(self, file): 1.159 + return HGFileInfo(file, self) 1.160 + 1.161 +class HGFileInfo(VCSFileInfo): 1.162 + def __init__(self, file, repo): 1.163 + VCSFileInfo.__init__(self, file) 1.164 + self.repo = repo 1.165 + self.file = os.path.relpath(file, repo.path) 1.166 + 1.167 + def GetRoot(self): 1.168 + return self.repo.root 1.169 + 1.170 + def GetCleanRoot(self): 1.171 + return self.repo.cleanroot 1.172 + 1.173 + def GetRevision(self): 1.174 + return self.repo.rev 1.175 + 1.176 + def GetFilename(self): 1.177 + if self.revision and self.clean_root: 1.178 + return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision) 1.179 + return self.file 1.180 + 1.181 +class GitRepoInfo: 1.182 + """ 1.183 + Info about a local git repository. Does not currently 1.184 + support discovering info about a git clone, the info must be 1.185 + provided out-of-band. 1.186 + """ 1.187 + def __init__(self, path, rev, root): 1.188 + self.path = path 1.189 + cleanroot = None 1.190 + if root: 1.191 + match = rootRegex.match(root) 1.192 + if match: 1.193 + cleanroot = match.group(1) 1.194 + if cleanroot.endswith('/'): 1.195 + cleanroot = cleanroot[:-1] 1.196 + if cleanroot is None: 1.197 + print >> sys.stderr, textwrap.dedent("""\ 1.198 + Could not determine repo info for %s (%s). This is either not a clone of a web-based 1.199 + repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root) 1.200 + sys.exit(1) 1.201 + self.rev = rev 1.202 + self.cleanroot = cleanroot 1.203 + 1.204 + def GetFileInfo(self, file): 1.205 + return GitFileInfo(file, self) 1.206 + 1.207 +class GitFileInfo(VCSFileInfo): 1.208 + def __init__(self, file, repo): 1.209 + VCSFileInfo.__init__(self, file) 1.210 + self.repo = repo 1.211 + self.file = os.path.relpath(file, repo.path) 1.212 + 1.213 + def GetRoot(self): 1.214 + return self.repo.path 1.215 + 1.216 + def GetCleanRoot(self): 1.217 + return self.repo.cleanroot 1.218 + 1.219 + def GetRevision(self): 1.220 + return self.repo.rev 1.221 + 1.222 + def GetFilename(self): 1.223 + if self.revision and self.clean_root: 1.224 + return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision) 1.225 + return self.file 1.226 + 1.227 +# Utility functions 1.228 + 1.229 +# A cache of files for which VCS info has already been determined. Used to 1.230 +# prevent extra filesystem activity or process launching. 1.231 +vcsFileInfoCache = {} 1.232 + 1.233 +def IsInDir(file, dir): 1.234 + # the lower() is to handle win32+vc8, where 1.235 + # the source filenames come out all lowercase, 1.236 + # but the srcdir can be mixed case 1.237 + return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower()) 1.238 + 1.239 +def GetVCSFilenameFromSrcdir(file, srcdir): 1.240 + if srcdir not in Dumper.srcdirRepoInfo: 1.241 + # Not in cache, so find it adnd cache it 1.242 + if os.path.isdir(os.path.join(srcdir, '.hg')): 1.243 + Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir) 1.244 + else: 1.245 + # Unknown VCS or file is not in a repo. 1.246 + return None 1.247 + return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file) 1.248 + 1.249 +def GetVCSFilename(file, srcdirs): 1.250 + """Given a full path to a file, and the top source directory, 1.251 + look for version control information about this file, and return 1.252 + a tuple containing 1.253 + 1) a specially formatted filename that contains the VCS type, 1.254 + VCS location, relative filename, and revision number, formatted like: 1.255 + vcs:vcs location:filename:revision 1.256 + For example: 1.257 + cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36 1.258 + 2) the unmodified root information if it exists""" 1.259 + (path, filename) = os.path.split(file) 1.260 + if path == '' or filename == '': 1.261 + return (file, None) 1.262 + 1.263 + fileInfo = None 1.264 + root = '' 1.265 + if file in vcsFileInfoCache: 1.266 + # Already cached this info, use it. 1.267 + fileInfo = vcsFileInfoCache[file] 1.268 + else: 1.269 + for srcdir in srcdirs: 1.270 + if not IsInDir(file, srcdir): 1.271 + continue 1.272 + fileInfo = GetVCSFilenameFromSrcdir(file, srcdir) 1.273 + if fileInfo: 1.274 + vcsFileInfoCache[file] = fileInfo 1.275 + break 1.276 + 1.277 + if fileInfo: 1.278 + file = fileInfo.filename 1.279 + root = fileInfo.root 1.280 + 1.281 + # we want forward slashes on win32 paths 1.282 + return (file.replace("\\", "/"), root) 1.283 + 1.284 +def GetPlatformSpecificDumper(**kwargs): 1.285 + """This function simply returns a instance of a subclass of Dumper 1.286 + that is appropriate for the current platform.""" 1.287 + # Python 2.5 has a bug where platform.system() returns 'Microsoft'. 1.288 + # Remove this when we no longer support Python 2.5. 1.289 + return {'Windows': Dumper_Win32, 1.290 + 'Microsoft': Dumper_Win32, 1.291 + 'Linux': Dumper_Linux, 1.292 + 'Sunos5': Dumper_Solaris, 1.293 + 'Darwin': Dumper_Mac}[platform.system()](**kwargs) 1.294 + 1.295 +def SourceIndex(fileStream, outputPath, vcs_root): 1.296 + """Takes a list of files, writes info to a data block in a .stream file""" 1.297 + # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing 1.298 + # Create the srcsrv data block that indexes the pdb file 1.299 + result = True 1.300 + pdbStreamFile = open(outputPath, "w") 1.301 + pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''') 1.302 + pdbStreamFile.write(vcs_root) 1.303 + pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''') 1.304 + pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above 1.305 + pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n") 1.306 + pdbStreamFile.close() 1.307 + return result 1.308 + 1.309 +def WorkerInitializer(cls, lock, srcdirRepoInfo): 1.310 + """Windows worker processes won't have run GlobalInit, and due to a lack of fork(), 1.311 + won't inherit the class variables from the parent. They only need a few variables, 1.312 + so we run an initializer to set them. Redundant but harmless on other platforms.""" 1.313 + cls.lock = lock 1.314 + cls.srcdirRepoInfo = srcdirRepoInfo 1.315 + 1.316 +def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg): 1.317 + """multiprocessing can't handle methods as Process targets, so we define 1.318 + a simple wrapper function around the work method.""" 1.319 + return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg) 1.320 + 1.321 +class Dumper: 1.322 + """This class can dump symbols from a file with debug info, and 1.323 + store the output in a directory structure that is valid for use as 1.324 + a Breakpad symbol server. Requires a path to a dump_syms binary-- 1.325 + |dump_syms| and a directory to store symbols in--|symbol_path|. 1.326 + Optionally takes a list of processor architectures to process from 1.327 + each debug file--|archs|, the full path to the top source 1.328 + directory--|srcdir|, for generating relative source file names, 1.329 + and an option to copy debug info files alongside the dumped 1.330 + symbol files--|copy_debug|, mostly useful for creating a 1.331 + Microsoft Symbol Server from the resulting output. 1.332 + 1.333 + You don't want to use this directly if you intend to call 1.334 + ProcessDir. Instead, call GetPlatformSpecificDumper to 1.335 + get an instance of a subclass. 1.336 + 1.337 + Processing is performed asynchronously via worker processes; in 1.338 + order to wait for processing to finish and cleanup correctly, you 1.339 + must call Finish after all Process/ProcessDir calls have been made. 1.340 + You must also call Dumper.GlobalInit before creating or using any 1.341 + instances.""" 1.342 + def __init__(self, dump_syms, symbol_path, 1.343 + archs=None, 1.344 + srcdirs=[], 1.345 + copy_debug=False, 1.346 + vcsinfo=False, 1.347 + srcsrv=False, 1.348 + exclude=[], 1.349 + repo_manifest=None): 1.350 + # popen likes absolute paths, at least on windows 1.351 + self.dump_syms = os.path.abspath(dump_syms) 1.352 + self.symbol_path = symbol_path 1.353 + if archs is None: 1.354 + # makes the loop logic simpler 1.355 + self.archs = [''] 1.356 + else: 1.357 + self.archs = ['-a %s' % a for a in archs.split()] 1.358 + self.srcdirs = [os.path.normpath(a) for a in srcdirs] 1.359 + self.copy_debug = copy_debug 1.360 + self.vcsinfo = vcsinfo 1.361 + self.srcsrv = srcsrv 1.362 + self.exclude = exclude[:] 1.363 + if repo_manifest: 1.364 + self.parse_repo_manifest(repo_manifest) 1.365 + 1.366 + # book-keeping to keep track of our jobs and the cleanup work per file tuple 1.367 + self.files_record = {} 1.368 + self.jobs_record = collections.defaultdict(int) 1.369 + 1.370 + @classmethod 1.371 + def GlobalInit(cls, module=multiprocessing): 1.372 + """Initialize the class globals for the multiprocessing setup; must 1.373 + be called before any Dumper instances are created and used. Test cases 1.374 + may pass in a different module to supply Manager and Pool objects, 1.375 + usually multiprocessing.dummy.""" 1.376 + num_cpus = module.cpu_count() 1.377 + if num_cpus is None: 1.378 + # assume a dual core machine if we can't find out for some reason 1.379 + # probably better on single core anyway due to I/O constraints 1.380 + num_cpus = 2 1.381 + 1.382 + # have to create any locks etc before the pool 1.383 + cls.manager = module.Manager() 1.384 + cls.jobs_condition = Dumper.manager.Condition() 1.385 + cls.lock = Dumper.manager.RLock() 1.386 + cls.srcdirRepoInfo = Dumper.manager.dict() 1.387 + cls.pool = module.Pool(num_cpus, WorkerInitializer, 1.388 + (cls, cls.lock, cls.srcdirRepoInfo)) 1.389 + 1.390 + def JobStarted(self, file_key): 1.391 + """Increments the number of submitted jobs for the specified key file, 1.392 + defined as the original file we processed; note that a single key file 1.393 + can generate up to 1 + len(self.archs) jobs in the Mac case.""" 1.394 + with Dumper.jobs_condition: 1.395 + self.jobs_record[file_key] += 1 1.396 + Dumper.jobs_condition.notify_all() 1.397 + 1.398 + def JobFinished(self, file_key): 1.399 + """Decrements the number of submitted jobs for the specified key file, 1.400 + defined as the original file we processed; once the count is back to 0, 1.401 + remove the entry from our record.""" 1.402 + with Dumper.jobs_condition: 1.403 + self.jobs_record[file_key] -= 1 1.404 + 1.405 + if self.jobs_record[file_key] == 0: 1.406 + del self.jobs_record[file_key] 1.407 + 1.408 + Dumper.jobs_condition.notify_all() 1.409 + 1.410 + def output(self, dest, output_str): 1.411 + """Writes |output_str| to |dest|, holding |lock|; 1.412 + terminates with a newline.""" 1.413 + with Dumper.lock: 1.414 + dest.write(output_str + "\n") 1.415 + dest.flush() 1.416 + 1.417 + def output_pid(self, dest, output_str): 1.418 + """Debugging output; prepends the pid to the string.""" 1.419 + self.output(dest, "%d: %s" % (os.getpid(), output_str)) 1.420 + 1.421 + def parse_repo_manifest(self, repo_manifest): 1.422 + """ 1.423 + Parse an XML manifest of repository info as produced 1.424 + by the `repo manifest -r` command. 1.425 + """ 1.426 + doc = parse(repo_manifest) 1.427 + if doc.firstChild.tagName != "manifest": 1.428 + return 1.429 + # First, get remotes. 1.430 + def ensure_slash(u): 1.431 + if not u.endswith("/"): 1.432 + return u + "/" 1.433 + return u 1.434 + remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")]) 1.435 + # And default remote. 1.436 + default_remote = None 1.437 + if doc.getElementsByTagName("default"): 1.438 + default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote") 1.439 + # Now get projects. Assume they're relative to repo_manifest. 1.440 + base_dir = os.path.abspath(os.path.dirname(repo_manifest)) 1.441 + for proj in doc.getElementsByTagName("project"): 1.442 + # name is the repository URL relative to the remote path. 1.443 + name = proj.getAttribute("name") 1.444 + # path is the path on-disk, relative to the manifest file. 1.445 + path = proj.getAttribute("path") 1.446 + # revision is the changeset ID. 1.447 + rev = proj.getAttribute("revision") 1.448 + # remote is the base URL to use. 1.449 + remote = proj.getAttribute("remote") 1.450 + # remote defaults to the <default remote>. 1.451 + if not remote: 1.452 + remote = default_remote 1.453 + # path defaults to name. 1.454 + if not path: 1.455 + path = name 1.456 + if not (name and path and rev and remote): 1.457 + print "Skipping project %s" % proj.toxml() 1.458 + continue 1.459 + remote = remotes[remote] 1.460 + # Turn git URLs into http URLs so that urljoin works. 1.461 + if remote.startswith("git:"): 1.462 + remote = "http" + remote[3:] 1.463 + # Add this project to srcdirs. 1.464 + srcdir = os.path.join(base_dir, path) 1.465 + self.srcdirs.append(srcdir) 1.466 + # And cache its VCS file info. Currently all repos mentioned 1.467 + # in a repo manifest are assumed to be git. 1.468 + root = urlparse.urljoin(remote, name) 1.469 + Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root) 1.470 + 1.471 + # subclasses override this 1.472 + def ShouldProcess(self, file): 1.473 + return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude) 1.474 + 1.475 + # and can override this 1.476 + def ShouldSkipDir(self, dir): 1.477 + return False 1.478 + 1.479 + def RunFileCommand(self, file): 1.480 + """Utility function, returns the output of file(1)""" 1.481 + try: 1.482 + # we use -L to read the targets of symlinks, 1.483 + # and -b to print just the content, not the filename 1.484 + return os.popen("file -Lb " + file).read() 1.485 + except: 1.486 + return "" 1.487 + 1.488 + # This is a no-op except on Win32 1.489 + def FixFilenameCase(self, file): 1.490 + return file 1.491 + 1.492 + # This is a no-op except on Win32 1.493 + def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): 1.494 + return "" 1.495 + 1.496 + # subclasses override this if they want to support this 1.497 + def CopyDebug(self, file, debug_file, guid): 1.498 + pass 1.499 + 1.500 + def Finish(self, stop_pool=True): 1.501 + """Wait for the expected number of jobs to be submitted, and then 1.502 + wait for the pool to finish processing them. By default, will close 1.503 + and clear the pool, but for testcases that need multiple runs, pass 1.504 + stop_pool = False.""" 1.505 + with Dumper.jobs_condition: 1.506 + while len(self.jobs_record) != 0: 1.507 + Dumper.jobs_condition.wait() 1.508 + if stop_pool: 1.509 + Dumper.pool.close() 1.510 + Dumper.pool.join() 1.511 + 1.512 + def Process(self, file_or_dir): 1.513 + """Process a file or all the (valid) files in a directory; processing is performed 1.514 + asynchronously, and Finish must be called to wait for it complete and cleanup.""" 1.515 + if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir): 1.516 + self.ProcessDir(file_or_dir) 1.517 + elif os.path.isfile(file_or_dir): 1.518 + self.ProcessFiles((file_or_dir,)) 1.519 + 1.520 + def ProcessDir(self, dir): 1.521 + """Process all the valid files in this directory. Valid files 1.522 + are determined by calling ShouldProcess; processing is performed 1.523 + asynchronously, and Finish must be called to wait for it complete and cleanup.""" 1.524 + for root, dirs, files in os.walk(dir): 1.525 + for d in dirs[:]: 1.526 + if self.ShouldSkipDir(d): 1.527 + dirs.remove(d) 1.528 + for f in files: 1.529 + fullpath = os.path.join(root, f) 1.530 + if self.ShouldProcess(fullpath): 1.531 + self.ProcessFiles((fullpath,)) 1.532 + 1.533 + def SubmitJob(self, file_key, func, args, callback): 1.534 + """Submits a job to the pool of workers; increments the number of submitted jobs.""" 1.535 + self.JobStarted(file_key) 1.536 + res = Dumper.pool.apply_async(func, args=args, callback=callback) 1.537 + 1.538 + def ProcessFilesFinished(self, res): 1.539 + """Callback from multiprocesing when ProcessFilesWork finishes; 1.540 + run the cleanup work, if any""" 1.541 + self.JobFinished(res['files'][-1]) 1.542 + # only run the cleanup function once per tuple of files 1.543 + self.files_record[res['files']] += 1 1.544 + if self.files_record[res['files']] == len(self.archs): 1.545 + del self.files_record[res['files']] 1.546 + if res['after']: 1.547 + res['after'](res['status'], res['after_arg']) 1.548 + 1.549 + def ProcessFiles(self, files, after=None, after_arg=None): 1.550 + """Dump symbols from these files into a symbol file, stored 1.551 + in the proper directory structure in |symbol_path|; processing is performed 1.552 + asynchronously, and Finish must be called to wait for it complete and cleanup. 1.553 + All files after the first are fallbacks in case the first file does not process 1.554 + successfully; if it does, no other files will be touched.""" 1.555 + self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files)) 1.556 + 1.557 + # tries to get the vcs root from the .mozconfig first - if it's not set 1.558 + # the tinderbox vcs path will be assigned further down 1.559 + vcs_root = os.environ.get("SRCSRV_ROOT") 1.560 + for arch_num, arch in enumerate(self.archs): 1.561 + self.files_record[files] = 0 # record that we submitted jobs for this tuple of files 1.562 + self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished) 1.563 + 1.564 + def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg): 1.565 + self.output_pid(sys.stderr, "Worker processing files: %s" % (files,)) 1.566 + 1.567 + # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on 1.568 + result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files } 1.569 + 1.570 + sourceFileStream = '' 1.571 + for file in files: 1.572 + # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully 1.573 + try: 1.574 + proc = subprocess.Popen([self.dump_syms] + arch.split() + [file], 1.575 + stdout=subprocess.PIPE) 1.576 + module_line = proc.stdout.next() 1.577 + if module_line.startswith("MODULE"): 1.578 + # MODULE os cpu guid debug_file 1.579 + (guid, debug_file) = (module_line.split())[3:5] 1.580 + # strip off .pdb extensions, and append .sym 1.581 + sym_file = re.sub("\.pdb$", "", debug_file) + ".sym" 1.582 + # we do want forward slashes here 1.583 + rel_path = os.path.join(debug_file, 1.584 + guid, 1.585 + sym_file).replace("\\", "/") 1.586 + full_path = os.path.normpath(os.path.join(self.symbol_path, 1.587 + rel_path)) 1.588 + try: 1.589 + os.makedirs(os.path.dirname(full_path)) 1.590 + except OSError: # already exists 1.591 + pass 1.592 + f = open(full_path, "w") 1.593 + f.write(module_line) 1.594 + # now process the rest of the output 1.595 + for line in proc.stdout: 1.596 + if line.startswith("FILE"): 1.597 + # FILE index filename 1.598 + (x, index, filename) = line.rstrip().split(None, 2) 1.599 + if sys.platform == "sunos5": 1.600 + for srcdir in self.srcdirs: 1.601 + start = filename.find(self.srcdir) 1.602 + if start != -1: 1.603 + filename = filename[start:] 1.604 + break 1.605 + filename = self.FixFilenameCase(filename) 1.606 + sourcepath = filename 1.607 + if self.vcsinfo: 1.608 + (filename, rootname) = GetVCSFilename(filename, self.srcdirs) 1.609 + # sets vcs_root in case the loop through files were to end on an empty rootname 1.610 + if vcs_root is None: 1.611 + if rootname: 1.612 + vcs_root = rootname 1.613 + # gather up files with hg for indexing 1.614 + if filename.startswith("hg"): 1.615 + (ver, checkout, source_file, revision) = filename.split(":", 3) 1.616 + sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n" 1.617 + f.write("FILE %s %s\n" % (index, filename)) 1.618 + else: 1.619 + # pass through all other lines unchanged 1.620 + f.write(line) 1.621 + # we want to return true only if at least one line is not a MODULE or FILE line 1.622 + result['status'] = True 1.623 + f.close() 1.624 + proc.wait() 1.625 + # we output relative paths so callers can get a list of what 1.626 + # was generated 1.627 + self.output(sys.stdout, rel_path) 1.628 + if self.srcsrv and vcs_root: 1.629 + # add source server indexing to the pdb file 1.630 + self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root) 1.631 + # only copy debug the first time if we have multiple architectures 1.632 + if self.copy_debug and arch_num == 0: 1.633 + self.CopyDebug(file, debug_file, guid) 1.634 + except StopIteration: 1.635 + pass 1.636 + except e: 1.637 + self.output(sys.stderr, "Unexpected error: %s" % (str(e),)) 1.638 + raise 1.639 + if result['status']: 1.640 + # we only need 1 file to work 1.641 + break 1.642 + return result 1.643 + 1.644 +# Platform-specific subclasses. For the most part, these just have 1.645 +# logic to determine what files to extract symbols from. 1.646 + 1.647 +class Dumper_Win32(Dumper): 1.648 + fixedFilenameCaseCache = {} 1.649 + 1.650 + def ShouldProcess(self, file): 1.651 + """This function will allow processing of pdb files that have dll 1.652 + or exe files with the same base name next to them.""" 1.653 + if not Dumper.ShouldProcess(self, file): 1.654 + return False 1.655 + if file.endswith(".pdb"): 1.656 + (path,ext) = os.path.splitext(file) 1.657 + if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"): 1.658 + return True 1.659 + return False 1.660 + 1.661 + def FixFilenameCase(self, file): 1.662 + """Recent versions of Visual C++ put filenames into 1.663 + PDB files as all lowercase. If the file exists 1.664 + on the local filesystem, fix it.""" 1.665 + 1.666 + # Use a cached version if we have one. 1.667 + if file in self.fixedFilenameCaseCache: 1.668 + return self.fixedFilenameCaseCache[file] 1.669 + 1.670 + result = file 1.671 + 1.672 + (path, filename) = os.path.split(file) 1.673 + if os.path.isdir(path): 1.674 + lc_filename = filename.lower() 1.675 + for f in os.listdir(path): 1.676 + if f.lower() == lc_filename: 1.677 + result = os.path.join(path, f) 1.678 + break 1.679 + 1.680 + # Cache the corrected version to avoid future filesystem hits. 1.681 + self.fixedFilenameCaseCache[file] = result 1.682 + return result 1.683 + 1.684 + def CopyDebug(self, file, debug_file, guid): 1.685 + rel_path = os.path.join(debug_file, 1.686 + guid, 1.687 + debug_file).replace("\\", "/") 1.688 + full_path = os.path.normpath(os.path.join(self.symbol_path, 1.689 + rel_path)) 1.690 + shutil.copyfile(file, full_path) 1.691 + # try compressing it 1.692 + compressed_file = os.path.splitext(full_path)[0] + ".pd_" 1.693 + # ignore makecab's output 1.694 + success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D", 1.695 + "CompressionMemory=21", 1.696 + full_path, compressed_file], 1.697 + stdout=open("NUL:","w"), stderr=subprocess.STDOUT) 1.698 + if success == 0 and os.path.exists(compressed_file): 1.699 + os.unlink(full_path) 1.700 + self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_") 1.701 + else: 1.702 + self.output(sys.stdout, rel_path) 1.703 + 1.704 + def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): 1.705 + # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing 1.706 + debug_file = os.path.abspath(debug_file) 1.707 + streamFilename = debug_file + ".stream" 1.708 + stream_output_path = os.path.abspath(streamFilename) 1.709 + # Call SourceIndex to create the .stream file 1.710 + result = SourceIndex(sourceFileStream, stream_output_path, vcs_root) 1.711 + if self.copy_debug: 1.712 + pdbstr_path = os.environ.get("PDBSTR_PATH") 1.713 + pdbstr = os.path.normpath(pdbstr_path) 1.714 + subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file), 1.715 + "-i:" + os.path.basename(streamFilename), "-s:srcsrv"], 1.716 + cwd=os.path.dirname(stream_output_path)) 1.717 + # clean up all the .stream files when done 1.718 + os.remove(stream_output_path) 1.719 + return result 1.720 + 1.721 +class Dumper_Linux(Dumper): 1.722 + objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy' 1.723 + def ShouldProcess(self, file): 1.724 + """This function will allow processing of files that are 1.725 + executable, or end with the .so extension, and additionally 1.726 + file(1) reports as being ELF files. It expects to find the file 1.727 + command in PATH.""" 1.728 + if not Dumper.ShouldProcess(self, file): 1.729 + return False 1.730 + if file.endswith(".so") or os.access(file, os.X_OK): 1.731 + return self.RunFileCommand(file).startswith("ELF") 1.732 + return False 1.733 + 1.734 + def CopyDebug(self, file, debug_file, guid): 1.735 + # We want to strip out the debug info, and add a 1.736 + # .gnu_debuglink section to the object, so the debugger can 1.737 + # actually load our debug info later. 1.738 + file_dbg = file + ".dbg" 1.739 + if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \ 1.740 + subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0: 1.741 + rel_path = os.path.join(debug_file, 1.742 + guid, 1.743 + debug_file + ".dbg") 1.744 + full_path = os.path.normpath(os.path.join(self.symbol_path, 1.745 + rel_path)) 1.746 + shutil.move(file_dbg, full_path) 1.747 + # gzip the shipped debug files 1.748 + os.system("gzip %s" % full_path) 1.749 + self.output(sys.stdout, rel_path + ".gz") 1.750 + else: 1.751 + if os.path.isfile(file_dbg): 1.752 + os.unlink(file_dbg) 1.753 + 1.754 +class Dumper_Solaris(Dumper): 1.755 + def RunFileCommand(self, file): 1.756 + """Utility function, returns the output of file(1)""" 1.757 + try: 1.758 + output = os.popen("file " + file).read() 1.759 + return output.split('\t')[1]; 1.760 + except: 1.761 + return "" 1.762 + 1.763 + def ShouldProcess(self, file): 1.764 + """This function will allow processing of files that are 1.765 + executable, or end with the .so extension, and additionally 1.766 + file(1) reports as being ELF files. It expects to find the file 1.767 + command in PATH.""" 1.768 + if not Dumper.ShouldProcess(self, file): 1.769 + return False 1.770 + if file.endswith(".so") or os.access(file, os.X_OK): 1.771 + return self.RunFileCommand(file).startswith("ELF") 1.772 + return False 1.773 + 1.774 +def StartProcessFilesWorkMac(dumper, file): 1.775 + """multiprocessing can't handle methods as Process targets, so we define 1.776 + a simple wrapper function around the work method.""" 1.777 + return dumper.ProcessFilesWorkMac(file) 1.778 + 1.779 +def AfterMac(status, dsymbundle): 1.780 + """Cleanup function to run on Macs after we process the file(s).""" 1.781 + # CopyDebug will already have been run from Dumper.ProcessFiles 1.782 + shutil.rmtree(dsymbundle) 1.783 + 1.784 +class Dumper_Mac(Dumper): 1.785 + def ShouldProcess(self, file): 1.786 + """This function will allow processing of files that are 1.787 + executable, or end with the .dylib extension, and additionally 1.788 + file(1) reports as being Mach-O files. It expects to find the file 1.789 + command in PATH.""" 1.790 + if not Dumper.ShouldProcess(self, file): 1.791 + return False 1.792 + if file.endswith(".dylib") or os.access(file, os.X_OK): 1.793 + return self.RunFileCommand(file).startswith("Mach-O") 1.794 + return False 1.795 + 1.796 + def ShouldSkipDir(self, dir): 1.797 + """We create .dSYM bundles on the fly, but if someone runs 1.798 + buildsymbols twice, we should skip any bundles we created 1.799 + previously, otherwise we'll recurse into them and try to 1.800 + dump the inner bits again.""" 1.801 + if dir.endswith(".dSYM"): 1.802 + return True 1.803 + return False 1.804 + 1.805 + def ProcessFiles(self, files, after=None, after_arg=None): 1.806 + # also note, files must be len 1 here, since we're the only ones 1.807 + # that ever add more than one file to the list 1.808 + self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0])) 1.809 + self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished) 1.810 + 1.811 + def ProcessFilesMacFinished(self, result): 1.812 + if result['status']: 1.813 + # kick off new jobs per-arch with our new list of files 1.814 + Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0]) 1.815 + # only decrement jobs *after* that, since otherwise we'll remove the record for this file 1.816 + self.JobFinished(result['files'][-1]) 1.817 + 1.818 + def ProcessFilesWorkMac(self, file): 1.819 + """dump_syms on Mac needs to be run on a dSYM bundle produced 1.820 + by dsymutil(1), so run dsymutil here and pass the bundle name 1.821 + down to the superclass method instead.""" 1.822 + self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,)) 1.823 + 1.824 + # our return is a status and a tuple of files to dump symbols for 1.825 + # the extra files are fallbacks; as soon as one is dumped successfully, we stop 1.826 + result = { 'status' : False, 'files' : None, 'file_key' : file } 1.827 + dsymbundle = file + ".dSYM" 1.828 + if os.path.exists(dsymbundle): 1.829 + shutil.rmtree(dsymbundle) 1.830 + # dsymutil takes --arch=foo instead of -a foo like everything else 1.831 + subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a] 1.832 + + [file], 1.833 + stdout=open("/dev/null","w")) 1.834 + if not os.path.exists(dsymbundle): 1.835 + # dsymutil won't produce a .dSYM for files without symbols 1.836 + self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,)) 1.837 + result['status'] = False 1.838 + result['files'] = (file, ) 1.839 + return result 1.840 + 1.841 + result['status'] = True 1.842 + result['files'] = (dsymbundle, file) 1.843 + return result 1.844 + 1.845 + def CopyDebug(self, file, debug_file, guid): 1.846 + """ProcessFiles has already produced a dSYM bundle, so we should just 1.847 + copy that to the destination directory. However, we'll package it 1.848 + into a .tar.bz2 because the debug symbols are pretty huge, and 1.849 + also because it's a bundle, so it's a directory. |file| here is the 1.850 + dSYM bundle, and |debug_file| is the original filename.""" 1.851 + rel_path = os.path.join(debug_file, 1.852 + guid, 1.853 + os.path.basename(file) + ".tar.bz2") 1.854 + full_path = os.path.abspath(os.path.join(self.symbol_path, 1.855 + rel_path)) 1.856 + success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)], 1.857 + cwd=os.path.dirname(file), 1.858 + stdout=open("/dev/null","w"), stderr=subprocess.STDOUT) 1.859 + if success == 0 and os.path.exists(full_path): 1.860 + self.output(sys.stdout, rel_path) 1.861 + 1.862 +# Entry point if called as a standalone program 1.863 +def main(): 1.864 + parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>") 1.865 + parser.add_option("-c", "--copy", 1.866 + action="store_true", dest="copy_debug", default=False, 1.867 + help="Copy debug info files into the same directory structure as symbol files") 1.868 + parser.add_option("-a", "--archs", 1.869 + action="store", dest="archs", 1.870 + help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)") 1.871 + parser.add_option("-s", "--srcdir", 1.872 + action="append", dest="srcdir", default=[], 1.873 + help="Use SRCDIR to determine relative paths to source files") 1.874 + parser.add_option("-v", "--vcs-info", 1.875 + action="store_true", dest="vcsinfo", 1.876 + help="Try to retrieve VCS info for each FILE listed in the output") 1.877 + parser.add_option("-i", "--source-index", 1.878 + action="store_true", dest="srcsrv", default=False, 1.879 + help="Add source index information to debug files, making them suitable for use in a source server.") 1.880 + parser.add_option("-x", "--exclude", 1.881 + action="append", dest="exclude", default=[], metavar="PATTERN", 1.882 + help="Skip processing files matching PATTERN.") 1.883 + parser.add_option("--repo-manifest", 1.884 + action="store", dest="repo_manifest", 1.885 + help="""Get source information from this XML manifest 1.886 +produced by the `repo manifest -r` command. 1.887 +""") 1.888 + (options, args) = parser.parse_args() 1.889 + 1.890 + #check to see if the pdbstr.exe exists 1.891 + if options.srcsrv: 1.892 + pdbstr = os.environ.get("PDBSTR_PATH") 1.893 + if not os.path.exists(pdbstr): 1.894 + print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n" 1.895 + sys.exit(1) 1.896 + 1.897 + if len(args) < 3: 1.898 + parser.error("not enough arguments") 1.899 + exit(1) 1.900 + 1.901 + dumper = GetPlatformSpecificDumper(dump_syms=args[0], 1.902 + symbol_path=args[1], 1.903 + copy_debug=options.copy_debug, 1.904 + archs=options.archs, 1.905 + srcdirs=options.srcdir, 1.906 + vcsinfo=options.vcsinfo, 1.907 + srcsrv=options.srcsrv, 1.908 + exclude=options.exclude, 1.909 + repo_manifest=options.repo_manifest) 1.910 + for arg in args[2:]: 1.911 + dumper.Process(arg) 1.912 + dumper.Finish() 1.913 + 1.914 +# run main if run directly 1.915 +if __name__ == "__main__": 1.916 + # set up the multiprocessing infrastructure before we start; 1.917 + # note that this needs to be in the __main__ guard, or else Windows will choke 1.918 + Dumper.GlobalInit() 1.919 + 1.920 + main()