toolkit/crashreporter/tools/symbolstore.py

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rwxr-xr-x

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 #!/bin/env python
michael@0 2 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 5 #
michael@0 6 # Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
michael@0 7 # <debug info files or dirs>
michael@0 8 # Runs dump_syms on each debug info file specified on the command line,
michael@0 9 # then places the resulting symbol file in the proper directory
michael@0 10 # structure in the symbol store path. Accepts multiple files
michael@0 11 # on the command line, so can be called as part of a pipe using
michael@0 12 # find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
michael@0 13 # But really, you might just want to pass it <dir>.
michael@0 14 #
michael@0 15 # Parameters accepted:
michael@0 16 # -c : Copy debug info files to the same directory structure
michael@0 17 # as sym files
michael@0 18 # -a "<archs>" : Run dump_syms -a <arch> for each space separated
michael@0 19 # cpu architecture in <archs> (only on OS X)
michael@0 20 # -s <srcdir> : Use <srcdir> as the top source directory to
michael@0 21 # generate relative filenames.
michael@0 22
michael@0 23 import sys
michael@0 24 import platform
michael@0 25 import os
michael@0 26 import re
michael@0 27 import shutil
michael@0 28 import textwrap
michael@0 29 import fnmatch
michael@0 30 import subprocess
michael@0 31 import urlparse
michael@0 32 import multiprocessing
michael@0 33 import collections
michael@0 34 from optparse import OptionParser
michael@0 35 from xml.dom.minidom import parse
michael@0 36
michael@0 37 # Utility classes
michael@0 38
michael@0 39 class VCSFileInfo:
michael@0 40 """ A base class for version-controlled file information. Ensures that the
michael@0 41 following attributes are generated only once (successfully):
michael@0 42
michael@0 43 self.root
michael@0 44 self.clean_root
michael@0 45 self.revision
michael@0 46 self.filename
michael@0 47
michael@0 48 The attributes are generated by a single call to the GetRoot,
michael@0 49 GetRevision, and GetFilename methods. Those methods are explicitly not
michael@0 50 implemented here and must be implemented in derived classes. """
michael@0 51
michael@0 52 def __init__(self, file):
michael@0 53 if not file:
michael@0 54 raise ValueError
michael@0 55 self.file = file
michael@0 56
michael@0 57 def __getattr__(self, name):
michael@0 58 """ __getattr__ is only called for attributes that are not set on self,
michael@0 59 so setting self.[attr] will prevent future calls to the GetRoot,
michael@0 60 GetRevision, and GetFilename methods. We don't set the values on
michael@0 61 failure on the off chance that a future call might succeed. """
michael@0 62
michael@0 63 if name == "root":
michael@0 64 root = self.GetRoot()
michael@0 65 if root:
michael@0 66 self.root = root
michael@0 67 return root
michael@0 68
michael@0 69 elif name == "clean_root":
michael@0 70 clean_root = self.GetCleanRoot()
michael@0 71 if clean_root:
michael@0 72 self.clean_root = clean_root
michael@0 73 return clean_root
michael@0 74
michael@0 75 elif name == "revision":
michael@0 76 revision = self.GetRevision()
michael@0 77 if revision:
michael@0 78 self.revision = revision
michael@0 79 return revision
michael@0 80
michael@0 81 elif name == "filename":
michael@0 82 filename = self.GetFilename()
michael@0 83 if filename:
michael@0 84 self.filename = filename
michael@0 85 return filename
michael@0 86
michael@0 87 raise AttributeError
michael@0 88
michael@0 89 def GetRoot(self):
michael@0 90 """ This method should return the unmodified root for the file or 'None'
michael@0 91 on failure. """
michael@0 92 raise NotImplementedError
michael@0 93
michael@0 94 def GetCleanRoot(self):
michael@0 95 """ This method should return the repository root for the file or 'None'
michael@0 96 on failure. """
michael@0 97 raise NotImplementedErrors
michael@0 98
michael@0 99 def GetRevision(self):
michael@0 100 """ This method should return the revision number for the file or 'None'
michael@0 101 on failure. """
michael@0 102 raise NotImplementedError
michael@0 103
michael@0 104 def GetFilename(self):
michael@0 105 """ This method should return the repository-specific filename for the
michael@0 106 file or 'None' on failure. """
michael@0 107 raise NotImplementedError
michael@0 108
michael@0 109
michael@0 110 # This regex separates protocol and optional username/password from a url.
michael@0 111 # For instance, all the following urls will be transformed into
michael@0 112 # 'foo.com/bar':
michael@0 113 #
michael@0 114 # http://foo.com/bar
michael@0 115 # svn+ssh://user@foo.com/bar
michael@0 116 # svn+ssh://user:pass@foo.com/bar
michael@0 117 #
michael@0 118 rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$')
michael@0 119
michael@0 120 def read_output(*args):
michael@0 121 (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate()
michael@0 122 return stdout.rstrip()
michael@0 123
michael@0 124 class HGRepoInfo:
michael@0 125 def __init__(self, path):
michael@0 126 self.path = path
michael@0 127 rev = read_output('hg', '-R', path,
michael@0 128 'parent', '--template={node|short}')
michael@0 129 # Look for the default hg path. If SRVSRV_ROOT is set, we
michael@0 130 # don't bother asking hg.
michael@0 131 hg_root = os.environ.get("SRCSRV_ROOT")
michael@0 132 if hg_root:
michael@0 133 root = hg_root
michael@0 134 else:
michael@0 135 root = read_output('hg', '-R', path,
michael@0 136 'showconfig', 'paths.default')
michael@0 137 if not root:
michael@0 138 print >> sys.stderr, "Failed to get HG Repo for %s" % path
michael@0 139 cleanroot = None
michael@0 140 if root:
michael@0 141 match = rootRegex.match(root)
michael@0 142 if match:
michael@0 143 cleanroot = match.group(1)
michael@0 144 if cleanroot.endswith('/'):
michael@0 145 cleanroot = cleanroot[:-1]
michael@0 146 if cleanroot is None:
michael@0 147 print >> sys.stderr, textwrap.dedent("""\
michael@0 148 Could not determine repo info for %s. This is either not a clone of the web-based
michael@0 149 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path
michael@0 150 sys.exit(1)
michael@0 151 self.rev = rev
michael@0 152 self.root = root
michael@0 153 self.cleanroot = cleanroot
michael@0 154
michael@0 155 def GetFileInfo(self, file):
michael@0 156 return HGFileInfo(file, self)
michael@0 157
michael@0 158 class HGFileInfo(VCSFileInfo):
michael@0 159 def __init__(self, file, repo):
michael@0 160 VCSFileInfo.__init__(self, file)
michael@0 161 self.repo = repo
michael@0 162 self.file = os.path.relpath(file, repo.path)
michael@0 163
michael@0 164 def GetRoot(self):
michael@0 165 return self.repo.root
michael@0 166
michael@0 167 def GetCleanRoot(self):
michael@0 168 return self.repo.cleanroot
michael@0 169
michael@0 170 def GetRevision(self):
michael@0 171 return self.repo.rev
michael@0 172
michael@0 173 def GetFilename(self):
michael@0 174 if self.revision and self.clean_root:
michael@0 175 return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision)
michael@0 176 return self.file
michael@0 177
michael@0 178 class GitRepoInfo:
michael@0 179 """
michael@0 180 Info about a local git repository. Does not currently
michael@0 181 support discovering info about a git clone, the info must be
michael@0 182 provided out-of-band.
michael@0 183 """
michael@0 184 def __init__(self, path, rev, root):
michael@0 185 self.path = path
michael@0 186 cleanroot = None
michael@0 187 if root:
michael@0 188 match = rootRegex.match(root)
michael@0 189 if match:
michael@0 190 cleanroot = match.group(1)
michael@0 191 if cleanroot.endswith('/'):
michael@0 192 cleanroot = cleanroot[:-1]
michael@0 193 if cleanroot is None:
michael@0 194 print >> sys.stderr, textwrap.dedent("""\
michael@0 195 Could not determine repo info for %s (%s). This is either not a clone of a web-based
michael@0 196 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root)
michael@0 197 sys.exit(1)
michael@0 198 self.rev = rev
michael@0 199 self.cleanroot = cleanroot
michael@0 200
michael@0 201 def GetFileInfo(self, file):
michael@0 202 return GitFileInfo(file, self)
michael@0 203
michael@0 204 class GitFileInfo(VCSFileInfo):
michael@0 205 def __init__(self, file, repo):
michael@0 206 VCSFileInfo.__init__(self, file)
michael@0 207 self.repo = repo
michael@0 208 self.file = os.path.relpath(file, repo.path)
michael@0 209
michael@0 210 def GetRoot(self):
michael@0 211 return self.repo.path
michael@0 212
michael@0 213 def GetCleanRoot(self):
michael@0 214 return self.repo.cleanroot
michael@0 215
michael@0 216 def GetRevision(self):
michael@0 217 return self.repo.rev
michael@0 218
michael@0 219 def GetFilename(self):
michael@0 220 if self.revision and self.clean_root:
michael@0 221 return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
michael@0 222 return self.file
michael@0 223
michael@0 224 # Utility functions
michael@0 225
michael@0 226 # A cache of files for which VCS info has already been determined. Used to
michael@0 227 # prevent extra filesystem activity or process launching.
michael@0 228 vcsFileInfoCache = {}
michael@0 229
michael@0 230 def IsInDir(file, dir):
michael@0 231 # the lower() is to handle win32+vc8, where
michael@0 232 # the source filenames come out all lowercase,
michael@0 233 # but the srcdir can be mixed case
michael@0 234 return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
michael@0 235
michael@0 236 def GetVCSFilenameFromSrcdir(file, srcdir):
michael@0 237 if srcdir not in Dumper.srcdirRepoInfo:
michael@0 238 # Not in cache, so find it adnd cache it
michael@0 239 if os.path.isdir(os.path.join(srcdir, '.hg')):
michael@0 240 Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
michael@0 241 else:
michael@0 242 # Unknown VCS or file is not in a repo.
michael@0 243 return None
michael@0 244 return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file)
michael@0 245
michael@0 246 def GetVCSFilename(file, srcdirs):
michael@0 247 """Given a full path to a file, and the top source directory,
michael@0 248 look for version control information about this file, and return
michael@0 249 a tuple containing
michael@0 250 1) a specially formatted filename that contains the VCS type,
michael@0 251 VCS location, relative filename, and revision number, formatted like:
michael@0 252 vcs:vcs location:filename:revision
michael@0 253 For example:
michael@0 254 cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36
michael@0 255 2) the unmodified root information if it exists"""
michael@0 256 (path, filename) = os.path.split(file)
michael@0 257 if path == '' or filename == '':
michael@0 258 return (file, None)
michael@0 259
michael@0 260 fileInfo = None
michael@0 261 root = ''
michael@0 262 if file in vcsFileInfoCache:
michael@0 263 # Already cached this info, use it.
michael@0 264 fileInfo = vcsFileInfoCache[file]
michael@0 265 else:
michael@0 266 for srcdir in srcdirs:
michael@0 267 if not IsInDir(file, srcdir):
michael@0 268 continue
michael@0 269 fileInfo = GetVCSFilenameFromSrcdir(file, srcdir)
michael@0 270 if fileInfo:
michael@0 271 vcsFileInfoCache[file] = fileInfo
michael@0 272 break
michael@0 273
michael@0 274 if fileInfo:
michael@0 275 file = fileInfo.filename
michael@0 276 root = fileInfo.root
michael@0 277
michael@0 278 # we want forward slashes on win32 paths
michael@0 279 return (file.replace("\\", "/"), root)
michael@0 280
michael@0 281 def GetPlatformSpecificDumper(**kwargs):
michael@0 282 """This function simply returns a instance of a subclass of Dumper
michael@0 283 that is appropriate for the current platform."""
michael@0 284 # Python 2.5 has a bug where platform.system() returns 'Microsoft'.
michael@0 285 # Remove this when we no longer support Python 2.5.
michael@0 286 return {'Windows': Dumper_Win32,
michael@0 287 'Microsoft': Dumper_Win32,
michael@0 288 'Linux': Dumper_Linux,
michael@0 289 'Sunos5': Dumper_Solaris,
michael@0 290 'Darwin': Dumper_Mac}[platform.system()](**kwargs)
michael@0 291
michael@0 292 def SourceIndex(fileStream, outputPath, vcs_root):
michael@0 293 """Takes a list of files, writes info to a data block in a .stream file"""
michael@0 294 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
michael@0 295 # Create the srcsrv data block that indexes the pdb file
michael@0 296 result = True
michael@0 297 pdbStreamFile = open(outputPath, "w")
michael@0 298 pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''')
michael@0 299 pdbStreamFile.write(vcs_root)
michael@0 300 pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''')
michael@0 301 pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above
michael@0 302 pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n")
michael@0 303 pdbStreamFile.close()
michael@0 304 return result
michael@0 305
michael@0 306 def WorkerInitializer(cls, lock, srcdirRepoInfo):
michael@0 307 """Windows worker processes won't have run GlobalInit, and due to a lack of fork(),
michael@0 308 won't inherit the class variables from the parent. They only need a few variables,
michael@0 309 so we run an initializer to set them. Redundant but harmless on other platforms."""
michael@0 310 cls.lock = lock
michael@0 311 cls.srcdirRepoInfo = srcdirRepoInfo
michael@0 312
michael@0 313 def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg):
michael@0 314 """multiprocessing can't handle methods as Process targets, so we define
michael@0 315 a simple wrapper function around the work method."""
michael@0 316 return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg)
michael@0 317
michael@0 318 class Dumper:
michael@0 319 """This class can dump symbols from a file with debug info, and
michael@0 320 store the output in a directory structure that is valid for use as
michael@0 321 a Breakpad symbol server. Requires a path to a dump_syms binary--
michael@0 322 |dump_syms| and a directory to store symbols in--|symbol_path|.
michael@0 323 Optionally takes a list of processor architectures to process from
michael@0 324 each debug file--|archs|, the full path to the top source
michael@0 325 directory--|srcdir|, for generating relative source file names,
michael@0 326 and an option to copy debug info files alongside the dumped
michael@0 327 symbol files--|copy_debug|, mostly useful for creating a
michael@0 328 Microsoft Symbol Server from the resulting output.
michael@0 329
michael@0 330 You don't want to use this directly if you intend to call
michael@0 331 ProcessDir. Instead, call GetPlatformSpecificDumper to
michael@0 332 get an instance of a subclass.
michael@0 333
michael@0 334 Processing is performed asynchronously via worker processes; in
michael@0 335 order to wait for processing to finish and cleanup correctly, you
michael@0 336 must call Finish after all Process/ProcessDir calls have been made.
michael@0 337 You must also call Dumper.GlobalInit before creating or using any
michael@0 338 instances."""
michael@0 339 def __init__(self, dump_syms, symbol_path,
michael@0 340 archs=None,
michael@0 341 srcdirs=[],
michael@0 342 copy_debug=False,
michael@0 343 vcsinfo=False,
michael@0 344 srcsrv=False,
michael@0 345 exclude=[],
michael@0 346 repo_manifest=None):
michael@0 347 # popen likes absolute paths, at least on windows
michael@0 348 self.dump_syms = os.path.abspath(dump_syms)
michael@0 349 self.symbol_path = symbol_path
michael@0 350 if archs is None:
michael@0 351 # makes the loop logic simpler
michael@0 352 self.archs = ['']
michael@0 353 else:
michael@0 354 self.archs = ['-a %s' % a for a in archs.split()]
michael@0 355 self.srcdirs = [os.path.normpath(a) for a in srcdirs]
michael@0 356 self.copy_debug = copy_debug
michael@0 357 self.vcsinfo = vcsinfo
michael@0 358 self.srcsrv = srcsrv
michael@0 359 self.exclude = exclude[:]
michael@0 360 if repo_manifest:
michael@0 361 self.parse_repo_manifest(repo_manifest)
michael@0 362
michael@0 363 # book-keeping to keep track of our jobs and the cleanup work per file tuple
michael@0 364 self.files_record = {}
michael@0 365 self.jobs_record = collections.defaultdict(int)
michael@0 366
michael@0 367 @classmethod
michael@0 368 def GlobalInit(cls, module=multiprocessing):
michael@0 369 """Initialize the class globals for the multiprocessing setup; must
michael@0 370 be called before any Dumper instances are created and used. Test cases
michael@0 371 may pass in a different module to supply Manager and Pool objects,
michael@0 372 usually multiprocessing.dummy."""
michael@0 373 num_cpus = module.cpu_count()
michael@0 374 if num_cpus is None:
michael@0 375 # assume a dual core machine if we can't find out for some reason
michael@0 376 # probably better on single core anyway due to I/O constraints
michael@0 377 num_cpus = 2
michael@0 378
michael@0 379 # have to create any locks etc before the pool
michael@0 380 cls.manager = module.Manager()
michael@0 381 cls.jobs_condition = Dumper.manager.Condition()
michael@0 382 cls.lock = Dumper.manager.RLock()
michael@0 383 cls.srcdirRepoInfo = Dumper.manager.dict()
michael@0 384 cls.pool = module.Pool(num_cpus, WorkerInitializer,
michael@0 385 (cls, cls.lock, cls.srcdirRepoInfo))
michael@0 386
michael@0 387 def JobStarted(self, file_key):
michael@0 388 """Increments the number of submitted jobs for the specified key file,
michael@0 389 defined as the original file we processed; note that a single key file
michael@0 390 can generate up to 1 + len(self.archs) jobs in the Mac case."""
michael@0 391 with Dumper.jobs_condition:
michael@0 392 self.jobs_record[file_key] += 1
michael@0 393 Dumper.jobs_condition.notify_all()
michael@0 394
michael@0 395 def JobFinished(self, file_key):
michael@0 396 """Decrements the number of submitted jobs for the specified key file,
michael@0 397 defined as the original file we processed; once the count is back to 0,
michael@0 398 remove the entry from our record."""
michael@0 399 with Dumper.jobs_condition:
michael@0 400 self.jobs_record[file_key] -= 1
michael@0 401
michael@0 402 if self.jobs_record[file_key] == 0:
michael@0 403 del self.jobs_record[file_key]
michael@0 404
michael@0 405 Dumper.jobs_condition.notify_all()
michael@0 406
michael@0 407 def output(self, dest, output_str):
michael@0 408 """Writes |output_str| to |dest|, holding |lock|;
michael@0 409 terminates with a newline."""
michael@0 410 with Dumper.lock:
michael@0 411 dest.write(output_str + "\n")
michael@0 412 dest.flush()
michael@0 413
michael@0 414 def output_pid(self, dest, output_str):
michael@0 415 """Debugging output; prepends the pid to the string."""
michael@0 416 self.output(dest, "%d: %s" % (os.getpid(), output_str))
michael@0 417
michael@0 418 def parse_repo_manifest(self, repo_manifest):
michael@0 419 """
michael@0 420 Parse an XML manifest of repository info as produced
michael@0 421 by the `repo manifest -r` command.
michael@0 422 """
michael@0 423 doc = parse(repo_manifest)
michael@0 424 if doc.firstChild.tagName != "manifest":
michael@0 425 return
michael@0 426 # First, get remotes.
michael@0 427 def ensure_slash(u):
michael@0 428 if not u.endswith("/"):
michael@0 429 return u + "/"
michael@0 430 return u
michael@0 431 remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")])
michael@0 432 # And default remote.
michael@0 433 default_remote = None
michael@0 434 if doc.getElementsByTagName("default"):
michael@0 435 default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote")
michael@0 436 # Now get projects. Assume they're relative to repo_manifest.
michael@0 437 base_dir = os.path.abspath(os.path.dirname(repo_manifest))
michael@0 438 for proj in doc.getElementsByTagName("project"):
michael@0 439 # name is the repository URL relative to the remote path.
michael@0 440 name = proj.getAttribute("name")
michael@0 441 # path is the path on-disk, relative to the manifest file.
michael@0 442 path = proj.getAttribute("path")
michael@0 443 # revision is the changeset ID.
michael@0 444 rev = proj.getAttribute("revision")
michael@0 445 # remote is the base URL to use.
michael@0 446 remote = proj.getAttribute("remote")
michael@0 447 # remote defaults to the <default remote>.
michael@0 448 if not remote:
michael@0 449 remote = default_remote
michael@0 450 # path defaults to name.
michael@0 451 if not path:
michael@0 452 path = name
michael@0 453 if not (name and path and rev and remote):
michael@0 454 print "Skipping project %s" % proj.toxml()
michael@0 455 continue
michael@0 456 remote = remotes[remote]
michael@0 457 # Turn git URLs into http URLs so that urljoin works.
michael@0 458 if remote.startswith("git:"):
michael@0 459 remote = "http" + remote[3:]
michael@0 460 # Add this project to srcdirs.
michael@0 461 srcdir = os.path.join(base_dir, path)
michael@0 462 self.srcdirs.append(srcdir)
michael@0 463 # And cache its VCS file info. Currently all repos mentioned
michael@0 464 # in a repo manifest are assumed to be git.
michael@0 465 root = urlparse.urljoin(remote, name)
michael@0 466 Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
michael@0 467
michael@0 468 # subclasses override this
michael@0 469 def ShouldProcess(self, file):
michael@0 470 return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude)
michael@0 471
michael@0 472 # and can override this
michael@0 473 def ShouldSkipDir(self, dir):
michael@0 474 return False
michael@0 475
michael@0 476 def RunFileCommand(self, file):
michael@0 477 """Utility function, returns the output of file(1)"""
michael@0 478 try:
michael@0 479 # we use -L to read the targets of symlinks,
michael@0 480 # and -b to print just the content, not the filename
michael@0 481 return os.popen("file -Lb " + file).read()
michael@0 482 except:
michael@0 483 return ""
michael@0 484
michael@0 485 # This is a no-op except on Win32
michael@0 486 def FixFilenameCase(self, file):
michael@0 487 return file
michael@0 488
michael@0 489 # This is a no-op except on Win32
michael@0 490 def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
michael@0 491 return ""
michael@0 492
michael@0 493 # subclasses override this if they want to support this
michael@0 494 def CopyDebug(self, file, debug_file, guid):
michael@0 495 pass
michael@0 496
michael@0 497 def Finish(self, stop_pool=True):
michael@0 498 """Wait for the expected number of jobs to be submitted, and then
michael@0 499 wait for the pool to finish processing them. By default, will close
michael@0 500 and clear the pool, but for testcases that need multiple runs, pass
michael@0 501 stop_pool = False."""
michael@0 502 with Dumper.jobs_condition:
michael@0 503 while len(self.jobs_record) != 0:
michael@0 504 Dumper.jobs_condition.wait()
michael@0 505 if stop_pool:
michael@0 506 Dumper.pool.close()
michael@0 507 Dumper.pool.join()
michael@0 508
michael@0 509 def Process(self, file_or_dir):
michael@0 510 """Process a file or all the (valid) files in a directory; processing is performed
michael@0 511 asynchronously, and Finish must be called to wait for it complete and cleanup."""
michael@0 512 if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir):
michael@0 513 self.ProcessDir(file_or_dir)
michael@0 514 elif os.path.isfile(file_or_dir):
michael@0 515 self.ProcessFiles((file_or_dir,))
michael@0 516
michael@0 517 def ProcessDir(self, dir):
michael@0 518 """Process all the valid files in this directory. Valid files
michael@0 519 are determined by calling ShouldProcess; processing is performed
michael@0 520 asynchronously, and Finish must be called to wait for it complete and cleanup."""
michael@0 521 for root, dirs, files in os.walk(dir):
michael@0 522 for d in dirs[:]:
michael@0 523 if self.ShouldSkipDir(d):
michael@0 524 dirs.remove(d)
michael@0 525 for f in files:
michael@0 526 fullpath = os.path.join(root, f)
michael@0 527 if self.ShouldProcess(fullpath):
michael@0 528 self.ProcessFiles((fullpath,))
michael@0 529
michael@0 530 def SubmitJob(self, file_key, func, args, callback):
michael@0 531 """Submits a job to the pool of workers; increments the number of submitted jobs."""
michael@0 532 self.JobStarted(file_key)
michael@0 533 res = Dumper.pool.apply_async(func, args=args, callback=callback)
michael@0 534
michael@0 535 def ProcessFilesFinished(self, res):
michael@0 536 """Callback from multiprocesing when ProcessFilesWork finishes;
michael@0 537 run the cleanup work, if any"""
michael@0 538 self.JobFinished(res['files'][-1])
michael@0 539 # only run the cleanup function once per tuple of files
michael@0 540 self.files_record[res['files']] += 1
michael@0 541 if self.files_record[res['files']] == len(self.archs):
michael@0 542 del self.files_record[res['files']]
michael@0 543 if res['after']:
michael@0 544 res['after'](res['status'], res['after_arg'])
michael@0 545
michael@0 546 def ProcessFiles(self, files, after=None, after_arg=None):
michael@0 547 """Dump symbols from these files into a symbol file, stored
michael@0 548 in the proper directory structure in |symbol_path|; processing is performed
michael@0 549 asynchronously, and Finish must be called to wait for it complete and cleanup.
michael@0 550 All files after the first are fallbacks in case the first file does not process
michael@0 551 successfully; if it does, no other files will be touched."""
michael@0 552 self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files))
michael@0 553
michael@0 554 # tries to get the vcs root from the .mozconfig first - if it's not set
michael@0 555 # the tinderbox vcs path will be assigned further down
michael@0 556 vcs_root = os.environ.get("SRCSRV_ROOT")
michael@0 557 for arch_num, arch in enumerate(self.archs):
michael@0 558 self.files_record[files] = 0 # record that we submitted jobs for this tuple of files
michael@0 559 self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished)
michael@0 560
michael@0 561 def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg):
michael@0 562 self.output_pid(sys.stderr, "Worker processing files: %s" % (files,))
michael@0 563
michael@0 564 # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on
michael@0 565 result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files }
michael@0 566
michael@0 567 sourceFileStream = ''
michael@0 568 for file in files:
michael@0 569 # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully
michael@0 570 try:
michael@0 571 proc = subprocess.Popen([self.dump_syms] + arch.split() + [file],
michael@0 572 stdout=subprocess.PIPE)
michael@0 573 module_line = proc.stdout.next()
michael@0 574 if module_line.startswith("MODULE"):
michael@0 575 # MODULE os cpu guid debug_file
michael@0 576 (guid, debug_file) = (module_line.split())[3:5]
michael@0 577 # strip off .pdb extensions, and append .sym
michael@0 578 sym_file = re.sub("\.pdb$", "", debug_file) + ".sym"
michael@0 579 # we do want forward slashes here
michael@0 580 rel_path = os.path.join(debug_file,
michael@0 581 guid,
michael@0 582 sym_file).replace("\\", "/")
michael@0 583 full_path = os.path.normpath(os.path.join(self.symbol_path,
michael@0 584 rel_path))
michael@0 585 try:
michael@0 586 os.makedirs(os.path.dirname(full_path))
michael@0 587 except OSError: # already exists
michael@0 588 pass
michael@0 589 f = open(full_path, "w")
michael@0 590 f.write(module_line)
michael@0 591 # now process the rest of the output
michael@0 592 for line in proc.stdout:
michael@0 593 if line.startswith("FILE"):
michael@0 594 # FILE index filename
michael@0 595 (x, index, filename) = line.rstrip().split(None, 2)
michael@0 596 if sys.platform == "sunos5":
michael@0 597 for srcdir in self.srcdirs:
michael@0 598 start = filename.find(self.srcdir)
michael@0 599 if start != -1:
michael@0 600 filename = filename[start:]
michael@0 601 break
michael@0 602 filename = self.FixFilenameCase(filename)
michael@0 603 sourcepath = filename
michael@0 604 if self.vcsinfo:
michael@0 605 (filename, rootname) = GetVCSFilename(filename, self.srcdirs)
michael@0 606 # sets vcs_root in case the loop through files were to end on an empty rootname
michael@0 607 if vcs_root is None:
michael@0 608 if rootname:
michael@0 609 vcs_root = rootname
michael@0 610 # gather up files with hg for indexing
michael@0 611 if filename.startswith("hg"):
michael@0 612 (ver, checkout, source_file, revision) = filename.split(":", 3)
michael@0 613 sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n"
michael@0 614 f.write("FILE %s %s\n" % (index, filename))
michael@0 615 else:
michael@0 616 # pass through all other lines unchanged
michael@0 617 f.write(line)
michael@0 618 # we want to return true only if at least one line is not a MODULE or FILE line
michael@0 619 result['status'] = True
michael@0 620 f.close()
michael@0 621 proc.wait()
michael@0 622 # we output relative paths so callers can get a list of what
michael@0 623 # was generated
michael@0 624 self.output(sys.stdout, rel_path)
michael@0 625 if self.srcsrv and vcs_root:
michael@0 626 # add source server indexing to the pdb file
michael@0 627 self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root)
michael@0 628 # only copy debug the first time if we have multiple architectures
michael@0 629 if self.copy_debug and arch_num == 0:
michael@0 630 self.CopyDebug(file, debug_file, guid)
michael@0 631 except StopIteration:
michael@0 632 pass
michael@0 633 except e:
michael@0 634 self.output(sys.stderr, "Unexpected error: %s" % (str(e),))
michael@0 635 raise
michael@0 636 if result['status']:
michael@0 637 # we only need 1 file to work
michael@0 638 break
michael@0 639 return result
michael@0 640
michael@0 641 # Platform-specific subclasses. For the most part, these just have
michael@0 642 # logic to determine what files to extract symbols from.
michael@0 643
michael@0 644 class Dumper_Win32(Dumper):
michael@0 645 fixedFilenameCaseCache = {}
michael@0 646
michael@0 647 def ShouldProcess(self, file):
michael@0 648 """This function will allow processing of pdb files that have dll
michael@0 649 or exe files with the same base name next to them."""
michael@0 650 if not Dumper.ShouldProcess(self, file):
michael@0 651 return False
michael@0 652 if file.endswith(".pdb"):
michael@0 653 (path,ext) = os.path.splitext(file)
michael@0 654 if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"):
michael@0 655 return True
michael@0 656 return False
michael@0 657
michael@0 658 def FixFilenameCase(self, file):
michael@0 659 """Recent versions of Visual C++ put filenames into
michael@0 660 PDB files as all lowercase. If the file exists
michael@0 661 on the local filesystem, fix it."""
michael@0 662
michael@0 663 # Use a cached version if we have one.
michael@0 664 if file in self.fixedFilenameCaseCache:
michael@0 665 return self.fixedFilenameCaseCache[file]
michael@0 666
michael@0 667 result = file
michael@0 668
michael@0 669 (path, filename) = os.path.split(file)
michael@0 670 if os.path.isdir(path):
michael@0 671 lc_filename = filename.lower()
michael@0 672 for f in os.listdir(path):
michael@0 673 if f.lower() == lc_filename:
michael@0 674 result = os.path.join(path, f)
michael@0 675 break
michael@0 676
michael@0 677 # Cache the corrected version to avoid future filesystem hits.
michael@0 678 self.fixedFilenameCaseCache[file] = result
michael@0 679 return result
michael@0 680
michael@0 681 def CopyDebug(self, file, debug_file, guid):
michael@0 682 rel_path = os.path.join(debug_file,
michael@0 683 guid,
michael@0 684 debug_file).replace("\\", "/")
michael@0 685 full_path = os.path.normpath(os.path.join(self.symbol_path,
michael@0 686 rel_path))
michael@0 687 shutil.copyfile(file, full_path)
michael@0 688 # try compressing it
michael@0 689 compressed_file = os.path.splitext(full_path)[0] + ".pd_"
michael@0 690 # ignore makecab's output
michael@0 691 success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D",
michael@0 692 "CompressionMemory=21",
michael@0 693 full_path, compressed_file],
michael@0 694 stdout=open("NUL:","w"), stderr=subprocess.STDOUT)
michael@0 695 if success == 0 and os.path.exists(compressed_file):
michael@0 696 os.unlink(full_path)
michael@0 697 self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_")
michael@0 698 else:
michael@0 699 self.output(sys.stdout, rel_path)
michael@0 700
michael@0 701 def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
michael@0 702 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
michael@0 703 debug_file = os.path.abspath(debug_file)
michael@0 704 streamFilename = debug_file + ".stream"
michael@0 705 stream_output_path = os.path.abspath(streamFilename)
michael@0 706 # Call SourceIndex to create the .stream file
michael@0 707 result = SourceIndex(sourceFileStream, stream_output_path, vcs_root)
michael@0 708 if self.copy_debug:
michael@0 709 pdbstr_path = os.environ.get("PDBSTR_PATH")
michael@0 710 pdbstr = os.path.normpath(pdbstr_path)
michael@0 711 subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file),
michael@0 712 "-i:" + os.path.basename(streamFilename), "-s:srcsrv"],
michael@0 713 cwd=os.path.dirname(stream_output_path))
michael@0 714 # clean up all the .stream files when done
michael@0 715 os.remove(stream_output_path)
michael@0 716 return result
michael@0 717
michael@0 718 class Dumper_Linux(Dumper):
michael@0 719 objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy'
michael@0 720 def ShouldProcess(self, file):
michael@0 721 """This function will allow processing of files that are
michael@0 722 executable, or end with the .so extension, and additionally
michael@0 723 file(1) reports as being ELF files. It expects to find the file
michael@0 724 command in PATH."""
michael@0 725 if not Dumper.ShouldProcess(self, file):
michael@0 726 return False
michael@0 727 if file.endswith(".so") or os.access(file, os.X_OK):
michael@0 728 return self.RunFileCommand(file).startswith("ELF")
michael@0 729 return False
michael@0 730
michael@0 731 def CopyDebug(self, file, debug_file, guid):
michael@0 732 # We want to strip out the debug info, and add a
michael@0 733 # .gnu_debuglink section to the object, so the debugger can
michael@0 734 # actually load our debug info later.
michael@0 735 file_dbg = file + ".dbg"
michael@0 736 if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \
michael@0 737 subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0:
michael@0 738 rel_path = os.path.join(debug_file,
michael@0 739 guid,
michael@0 740 debug_file + ".dbg")
michael@0 741 full_path = os.path.normpath(os.path.join(self.symbol_path,
michael@0 742 rel_path))
michael@0 743 shutil.move(file_dbg, full_path)
michael@0 744 # gzip the shipped debug files
michael@0 745 os.system("gzip %s" % full_path)
michael@0 746 self.output(sys.stdout, rel_path + ".gz")
michael@0 747 else:
michael@0 748 if os.path.isfile(file_dbg):
michael@0 749 os.unlink(file_dbg)
michael@0 750
michael@0 751 class Dumper_Solaris(Dumper):
michael@0 752 def RunFileCommand(self, file):
michael@0 753 """Utility function, returns the output of file(1)"""
michael@0 754 try:
michael@0 755 output = os.popen("file " + file).read()
michael@0 756 return output.split('\t')[1];
michael@0 757 except:
michael@0 758 return ""
michael@0 759
michael@0 760 def ShouldProcess(self, file):
michael@0 761 """This function will allow processing of files that are
michael@0 762 executable, or end with the .so extension, and additionally
michael@0 763 file(1) reports as being ELF files. It expects to find the file
michael@0 764 command in PATH."""
michael@0 765 if not Dumper.ShouldProcess(self, file):
michael@0 766 return False
michael@0 767 if file.endswith(".so") or os.access(file, os.X_OK):
michael@0 768 return self.RunFileCommand(file).startswith("ELF")
michael@0 769 return False
michael@0 770
michael@0 771 def StartProcessFilesWorkMac(dumper, file):
michael@0 772 """multiprocessing can't handle methods as Process targets, so we define
michael@0 773 a simple wrapper function around the work method."""
michael@0 774 return dumper.ProcessFilesWorkMac(file)
michael@0 775
michael@0 776 def AfterMac(status, dsymbundle):
michael@0 777 """Cleanup function to run on Macs after we process the file(s)."""
michael@0 778 # CopyDebug will already have been run from Dumper.ProcessFiles
michael@0 779 shutil.rmtree(dsymbundle)
michael@0 780
michael@0 781 class Dumper_Mac(Dumper):
michael@0 782 def ShouldProcess(self, file):
michael@0 783 """This function will allow processing of files that are
michael@0 784 executable, or end with the .dylib extension, and additionally
michael@0 785 file(1) reports as being Mach-O files. It expects to find the file
michael@0 786 command in PATH."""
michael@0 787 if not Dumper.ShouldProcess(self, file):
michael@0 788 return False
michael@0 789 if file.endswith(".dylib") or os.access(file, os.X_OK):
michael@0 790 return self.RunFileCommand(file).startswith("Mach-O")
michael@0 791 return False
michael@0 792
michael@0 793 def ShouldSkipDir(self, dir):
michael@0 794 """We create .dSYM bundles on the fly, but if someone runs
michael@0 795 buildsymbols twice, we should skip any bundles we created
michael@0 796 previously, otherwise we'll recurse into them and try to
michael@0 797 dump the inner bits again."""
michael@0 798 if dir.endswith(".dSYM"):
michael@0 799 return True
michael@0 800 return False
michael@0 801
michael@0 802 def ProcessFiles(self, files, after=None, after_arg=None):
michael@0 803 # also note, files must be len 1 here, since we're the only ones
michael@0 804 # that ever add more than one file to the list
michael@0 805 self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0]))
michael@0 806 self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished)
michael@0 807
michael@0 808 def ProcessFilesMacFinished(self, result):
michael@0 809 if result['status']:
michael@0 810 # kick off new jobs per-arch with our new list of files
michael@0 811 Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0])
michael@0 812 # only decrement jobs *after* that, since otherwise we'll remove the record for this file
michael@0 813 self.JobFinished(result['files'][-1])
michael@0 814
michael@0 815 def ProcessFilesWorkMac(self, file):
michael@0 816 """dump_syms on Mac needs to be run on a dSYM bundle produced
michael@0 817 by dsymutil(1), so run dsymutil here and pass the bundle name
michael@0 818 down to the superclass method instead."""
michael@0 819 self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,))
michael@0 820
michael@0 821 # our return is a status and a tuple of files to dump symbols for
michael@0 822 # the extra files are fallbacks; as soon as one is dumped successfully, we stop
michael@0 823 result = { 'status' : False, 'files' : None, 'file_key' : file }
michael@0 824 dsymbundle = file + ".dSYM"
michael@0 825 if os.path.exists(dsymbundle):
michael@0 826 shutil.rmtree(dsymbundle)
michael@0 827 # dsymutil takes --arch=foo instead of -a foo like everything else
michael@0 828 subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a]
michael@0 829 + [file],
michael@0 830 stdout=open("/dev/null","w"))
michael@0 831 if not os.path.exists(dsymbundle):
michael@0 832 # dsymutil won't produce a .dSYM for files without symbols
michael@0 833 self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,))
michael@0 834 result['status'] = False
michael@0 835 result['files'] = (file, )
michael@0 836 return result
michael@0 837
michael@0 838 result['status'] = True
michael@0 839 result['files'] = (dsymbundle, file)
michael@0 840 return result
michael@0 841
michael@0 842 def CopyDebug(self, file, debug_file, guid):
michael@0 843 """ProcessFiles has already produced a dSYM bundle, so we should just
michael@0 844 copy that to the destination directory. However, we'll package it
michael@0 845 into a .tar.bz2 because the debug symbols are pretty huge, and
michael@0 846 also because it's a bundle, so it's a directory. |file| here is the
michael@0 847 dSYM bundle, and |debug_file| is the original filename."""
michael@0 848 rel_path = os.path.join(debug_file,
michael@0 849 guid,
michael@0 850 os.path.basename(file) + ".tar.bz2")
michael@0 851 full_path = os.path.abspath(os.path.join(self.symbol_path,
michael@0 852 rel_path))
michael@0 853 success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)],
michael@0 854 cwd=os.path.dirname(file),
michael@0 855 stdout=open("/dev/null","w"), stderr=subprocess.STDOUT)
michael@0 856 if success == 0 and os.path.exists(full_path):
michael@0 857 self.output(sys.stdout, rel_path)
michael@0 858
michael@0 859 # Entry point if called as a standalone program
michael@0 860 def main():
michael@0 861 parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>")
michael@0 862 parser.add_option("-c", "--copy",
michael@0 863 action="store_true", dest="copy_debug", default=False,
michael@0 864 help="Copy debug info files into the same directory structure as symbol files")
michael@0 865 parser.add_option("-a", "--archs",
michael@0 866 action="store", dest="archs",
michael@0 867 help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)")
michael@0 868 parser.add_option("-s", "--srcdir",
michael@0 869 action="append", dest="srcdir", default=[],
michael@0 870 help="Use SRCDIR to determine relative paths to source files")
michael@0 871 parser.add_option("-v", "--vcs-info",
michael@0 872 action="store_true", dest="vcsinfo",
michael@0 873 help="Try to retrieve VCS info for each FILE listed in the output")
michael@0 874 parser.add_option("-i", "--source-index",
michael@0 875 action="store_true", dest="srcsrv", default=False,
michael@0 876 help="Add source index information to debug files, making them suitable for use in a source server.")
michael@0 877 parser.add_option("-x", "--exclude",
michael@0 878 action="append", dest="exclude", default=[], metavar="PATTERN",
michael@0 879 help="Skip processing files matching PATTERN.")
michael@0 880 parser.add_option("--repo-manifest",
michael@0 881 action="store", dest="repo_manifest",
michael@0 882 help="""Get source information from this XML manifest
michael@0 883 produced by the `repo manifest -r` command.
michael@0 884 """)
michael@0 885 (options, args) = parser.parse_args()
michael@0 886
michael@0 887 #check to see if the pdbstr.exe exists
michael@0 888 if options.srcsrv:
michael@0 889 pdbstr = os.environ.get("PDBSTR_PATH")
michael@0 890 if not os.path.exists(pdbstr):
michael@0 891 print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n"
michael@0 892 sys.exit(1)
michael@0 893
michael@0 894 if len(args) < 3:
michael@0 895 parser.error("not enough arguments")
michael@0 896 exit(1)
michael@0 897
michael@0 898 dumper = GetPlatformSpecificDumper(dump_syms=args[0],
michael@0 899 symbol_path=args[1],
michael@0 900 copy_debug=options.copy_debug,
michael@0 901 archs=options.archs,
michael@0 902 srcdirs=options.srcdir,
michael@0 903 vcsinfo=options.vcsinfo,
michael@0 904 srcsrv=options.srcsrv,
michael@0 905 exclude=options.exclude,
michael@0 906 repo_manifest=options.repo_manifest)
michael@0 907 for arg in args[2:]:
michael@0 908 dumper.Process(arg)
michael@0 909 dumper.Finish()
michael@0 910
michael@0 911 # run main if run directly
michael@0 912 if __name__ == "__main__":
michael@0 913 # set up the multiprocessing infrastructure before we start;
michael@0 914 # note that this needs to be in the __main__ guard, or else Windows will choke
michael@0 915 Dumper.GlobalInit()
michael@0 916
michael@0 917 main()

mercurial