|
1 #!/bin/env python |
|
2 # This Source Code Form is subject to the terms of the Mozilla Public |
|
3 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5 # |
|
6 # Usage: symbolstore.py <params> <dump_syms path> <symbol store path> |
|
7 # <debug info files or dirs> |
|
8 # Runs dump_syms on each debug info file specified on the command line, |
|
9 # then places the resulting symbol file in the proper directory |
|
10 # structure in the symbol store path. Accepts multiple files |
|
11 # on the command line, so can be called as part of a pipe using |
|
12 # find <dir> | xargs symbolstore.pl <dump_syms> <storepath> |
|
13 # But really, you might just want to pass it <dir>. |
|
14 # |
|
15 # Parameters accepted: |
|
16 # -c : Copy debug info files to the same directory structure |
|
17 # as sym files |
|
18 # -a "<archs>" : Run dump_syms -a <arch> for each space separated |
|
19 # cpu architecture in <archs> (only on OS X) |
|
20 # -s <srcdir> : Use <srcdir> as the top source directory to |
|
21 # generate relative filenames. |
|
22 |
|
23 import sys |
|
24 import platform |
|
25 import os |
|
26 import re |
|
27 import shutil |
|
28 import textwrap |
|
29 import fnmatch |
|
30 import subprocess |
|
31 import urlparse |
|
32 import multiprocessing |
|
33 import collections |
|
34 from optparse import OptionParser |
|
35 from xml.dom.minidom import parse |
|
36 |
|
37 # Utility classes |
|
38 |
|
39 class VCSFileInfo: |
|
40 """ A base class for version-controlled file information. Ensures that the |
|
41 following attributes are generated only once (successfully): |
|
42 |
|
43 self.root |
|
44 self.clean_root |
|
45 self.revision |
|
46 self.filename |
|
47 |
|
48 The attributes are generated by a single call to the GetRoot, |
|
49 GetRevision, and GetFilename methods. Those methods are explicitly not |
|
50 implemented here and must be implemented in derived classes. """ |
|
51 |
|
52 def __init__(self, file): |
|
53 if not file: |
|
54 raise ValueError |
|
55 self.file = file |
|
56 |
|
57 def __getattr__(self, name): |
|
58 """ __getattr__ is only called for attributes that are not set on self, |
|
59 so setting self.[attr] will prevent future calls to the GetRoot, |
|
60 GetRevision, and GetFilename methods. We don't set the values on |
|
61 failure on the off chance that a future call might succeed. """ |
|
62 |
|
63 if name == "root": |
|
64 root = self.GetRoot() |
|
65 if root: |
|
66 self.root = root |
|
67 return root |
|
68 |
|
69 elif name == "clean_root": |
|
70 clean_root = self.GetCleanRoot() |
|
71 if clean_root: |
|
72 self.clean_root = clean_root |
|
73 return clean_root |
|
74 |
|
75 elif name == "revision": |
|
76 revision = self.GetRevision() |
|
77 if revision: |
|
78 self.revision = revision |
|
79 return revision |
|
80 |
|
81 elif name == "filename": |
|
82 filename = self.GetFilename() |
|
83 if filename: |
|
84 self.filename = filename |
|
85 return filename |
|
86 |
|
87 raise AttributeError |
|
88 |
|
89 def GetRoot(self): |
|
90 """ This method should return the unmodified root for the file or 'None' |
|
91 on failure. """ |
|
92 raise NotImplementedError |
|
93 |
|
94 def GetCleanRoot(self): |
|
95 """ This method should return the repository root for the file or 'None' |
|
96 on failure. """ |
|
97 raise NotImplementedErrors |
|
98 |
|
99 def GetRevision(self): |
|
100 """ This method should return the revision number for the file or 'None' |
|
101 on failure. """ |
|
102 raise NotImplementedError |
|
103 |
|
104 def GetFilename(self): |
|
105 """ This method should return the repository-specific filename for the |
|
106 file or 'None' on failure. """ |
|
107 raise NotImplementedError |
|
108 |
|
109 |
|
110 # This regex separates protocol and optional username/password from a url. |
|
111 # For instance, all the following urls will be transformed into |
|
112 # 'foo.com/bar': |
|
113 # |
|
114 # http://foo.com/bar |
|
115 # svn+ssh://user@foo.com/bar |
|
116 # svn+ssh://user:pass@foo.com/bar |
|
117 # |
|
118 rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$') |
|
119 |
|
120 def read_output(*args): |
|
121 (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate() |
|
122 return stdout.rstrip() |
|
123 |
|
124 class HGRepoInfo: |
|
125 def __init__(self, path): |
|
126 self.path = path |
|
127 rev = read_output('hg', '-R', path, |
|
128 'parent', '--template={node|short}') |
|
129 # Look for the default hg path. If SRVSRV_ROOT is set, we |
|
130 # don't bother asking hg. |
|
131 hg_root = os.environ.get("SRCSRV_ROOT") |
|
132 if hg_root: |
|
133 root = hg_root |
|
134 else: |
|
135 root = read_output('hg', '-R', path, |
|
136 'showconfig', 'paths.default') |
|
137 if not root: |
|
138 print >> sys.stderr, "Failed to get HG Repo for %s" % path |
|
139 cleanroot = None |
|
140 if root: |
|
141 match = rootRegex.match(root) |
|
142 if match: |
|
143 cleanroot = match.group(1) |
|
144 if cleanroot.endswith('/'): |
|
145 cleanroot = cleanroot[:-1] |
|
146 if cleanroot is None: |
|
147 print >> sys.stderr, textwrap.dedent("""\ |
|
148 Could not determine repo info for %s. This is either not a clone of the web-based |
|
149 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path |
|
150 sys.exit(1) |
|
151 self.rev = rev |
|
152 self.root = root |
|
153 self.cleanroot = cleanroot |
|
154 |
|
155 def GetFileInfo(self, file): |
|
156 return HGFileInfo(file, self) |
|
157 |
|
158 class HGFileInfo(VCSFileInfo): |
|
159 def __init__(self, file, repo): |
|
160 VCSFileInfo.__init__(self, file) |
|
161 self.repo = repo |
|
162 self.file = os.path.relpath(file, repo.path) |
|
163 |
|
164 def GetRoot(self): |
|
165 return self.repo.root |
|
166 |
|
167 def GetCleanRoot(self): |
|
168 return self.repo.cleanroot |
|
169 |
|
170 def GetRevision(self): |
|
171 return self.repo.rev |
|
172 |
|
173 def GetFilename(self): |
|
174 if self.revision and self.clean_root: |
|
175 return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision) |
|
176 return self.file |
|
177 |
|
178 class GitRepoInfo: |
|
179 """ |
|
180 Info about a local git repository. Does not currently |
|
181 support discovering info about a git clone, the info must be |
|
182 provided out-of-band. |
|
183 """ |
|
184 def __init__(self, path, rev, root): |
|
185 self.path = path |
|
186 cleanroot = None |
|
187 if root: |
|
188 match = rootRegex.match(root) |
|
189 if match: |
|
190 cleanroot = match.group(1) |
|
191 if cleanroot.endswith('/'): |
|
192 cleanroot = cleanroot[:-1] |
|
193 if cleanroot is None: |
|
194 print >> sys.stderr, textwrap.dedent("""\ |
|
195 Could not determine repo info for %s (%s). This is either not a clone of a web-based |
|
196 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root) |
|
197 sys.exit(1) |
|
198 self.rev = rev |
|
199 self.cleanroot = cleanroot |
|
200 |
|
201 def GetFileInfo(self, file): |
|
202 return GitFileInfo(file, self) |
|
203 |
|
204 class GitFileInfo(VCSFileInfo): |
|
205 def __init__(self, file, repo): |
|
206 VCSFileInfo.__init__(self, file) |
|
207 self.repo = repo |
|
208 self.file = os.path.relpath(file, repo.path) |
|
209 |
|
210 def GetRoot(self): |
|
211 return self.repo.path |
|
212 |
|
213 def GetCleanRoot(self): |
|
214 return self.repo.cleanroot |
|
215 |
|
216 def GetRevision(self): |
|
217 return self.repo.rev |
|
218 |
|
219 def GetFilename(self): |
|
220 if self.revision and self.clean_root: |
|
221 return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision) |
|
222 return self.file |
|
223 |
|
224 # Utility functions |
|
225 |
|
226 # A cache of files for which VCS info has already been determined. Used to |
|
227 # prevent extra filesystem activity or process launching. |
|
228 vcsFileInfoCache = {} |
|
229 |
|
230 def IsInDir(file, dir): |
|
231 # the lower() is to handle win32+vc8, where |
|
232 # the source filenames come out all lowercase, |
|
233 # but the srcdir can be mixed case |
|
234 return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower()) |
|
235 |
|
236 def GetVCSFilenameFromSrcdir(file, srcdir): |
|
237 if srcdir not in Dumper.srcdirRepoInfo: |
|
238 # Not in cache, so find it adnd cache it |
|
239 if os.path.isdir(os.path.join(srcdir, '.hg')): |
|
240 Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir) |
|
241 else: |
|
242 # Unknown VCS or file is not in a repo. |
|
243 return None |
|
244 return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file) |
|
245 |
|
246 def GetVCSFilename(file, srcdirs): |
|
247 """Given a full path to a file, and the top source directory, |
|
248 look for version control information about this file, and return |
|
249 a tuple containing |
|
250 1) a specially formatted filename that contains the VCS type, |
|
251 VCS location, relative filename, and revision number, formatted like: |
|
252 vcs:vcs location:filename:revision |
|
253 For example: |
|
254 cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36 |
|
255 2) the unmodified root information if it exists""" |
|
256 (path, filename) = os.path.split(file) |
|
257 if path == '' or filename == '': |
|
258 return (file, None) |
|
259 |
|
260 fileInfo = None |
|
261 root = '' |
|
262 if file in vcsFileInfoCache: |
|
263 # Already cached this info, use it. |
|
264 fileInfo = vcsFileInfoCache[file] |
|
265 else: |
|
266 for srcdir in srcdirs: |
|
267 if not IsInDir(file, srcdir): |
|
268 continue |
|
269 fileInfo = GetVCSFilenameFromSrcdir(file, srcdir) |
|
270 if fileInfo: |
|
271 vcsFileInfoCache[file] = fileInfo |
|
272 break |
|
273 |
|
274 if fileInfo: |
|
275 file = fileInfo.filename |
|
276 root = fileInfo.root |
|
277 |
|
278 # we want forward slashes on win32 paths |
|
279 return (file.replace("\\", "/"), root) |
|
280 |
|
281 def GetPlatformSpecificDumper(**kwargs): |
|
282 """This function simply returns a instance of a subclass of Dumper |
|
283 that is appropriate for the current platform.""" |
|
284 # Python 2.5 has a bug where platform.system() returns 'Microsoft'. |
|
285 # Remove this when we no longer support Python 2.5. |
|
286 return {'Windows': Dumper_Win32, |
|
287 'Microsoft': Dumper_Win32, |
|
288 'Linux': Dumper_Linux, |
|
289 'Sunos5': Dumper_Solaris, |
|
290 'Darwin': Dumper_Mac}[platform.system()](**kwargs) |
|
291 |
|
292 def SourceIndex(fileStream, outputPath, vcs_root): |
|
293 """Takes a list of files, writes info to a data block in a .stream file""" |
|
294 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing |
|
295 # Create the srcsrv data block that indexes the pdb file |
|
296 result = True |
|
297 pdbStreamFile = open(outputPath, "w") |
|
298 pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''') |
|
299 pdbStreamFile.write(vcs_root) |
|
300 pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''') |
|
301 pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above |
|
302 pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n") |
|
303 pdbStreamFile.close() |
|
304 return result |
|
305 |
|
306 def WorkerInitializer(cls, lock, srcdirRepoInfo): |
|
307 """Windows worker processes won't have run GlobalInit, and due to a lack of fork(), |
|
308 won't inherit the class variables from the parent. They only need a few variables, |
|
309 so we run an initializer to set them. Redundant but harmless on other platforms.""" |
|
310 cls.lock = lock |
|
311 cls.srcdirRepoInfo = srcdirRepoInfo |
|
312 |
|
313 def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg): |
|
314 """multiprocessing can't handle methods as Process targets, so we define |
|
315 a simple wrapper function around the work method.""" |
|
316 return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg) |
|
317 |
|
318 class Dumper: |
|
319 """This class can dump symbols from a file with debug info, and |
|
320 store the output in a directory structure that is valid for use as |
|
321 a Breakpad symbol server. Requires a path to a dump_syms binary-- |
|
322 |dump_syms| and a directory to store symbols in--|symbol_path|. |
|
323 Optionally takes a list of processor architectures to process from |
|
324 each debug file--|archs|, the full path to the top source |
|
325 directory--|srcdir|, for generating relative source file names, |
|
326 and an option to copy debug info files alongside the dumped |
|
327 symbol files--|copy_debug|, mostly useful for creating a |
|
328 Microsoft Symbol Server from the resulting output. |
|
329 |
|
330 You don't want to use this directly if you intend to call |
|
331 ProcessDir. Instead, call GetPlatformSpecificDumper to |
|
332 get an instance of a subclass. |
|
333 |
|
334 Processing is performed asynchronously via worker processes; in |
|
335 order to wait for processing to finish and cleanup correctly, you |
|
336 must call Finish after all Process/ProcessDir calls have been made. |
|
337 You must also call Dumper.GlobalInit before creating or using any |
|
338 instances.""" |
|
339 def __init__(self, dump_syms, symbol_path, |
|
340 archs=None, |
|
341 srcdirs=[], |
|
342 copy_debug=False, |
|
343 vcsinfo=False, |
|
344 srcsrv=False, |
|
345 exclude=[], |
|
346 repo_manifest=None): |
|
347 # popen likes absolute paths, at least on windows |
|
348 self.dump_syms = os.path.abspath(dump_syms) |
|
349 self.symbol_path = symbol_path |
|
350 if archs is None: |
|
351 # makes the loop logic simpler |
|
352 self.archs = [''] |
|
353 else: |
|
354 self.archs = ['-a %s' % a for a in archs.split()] |
|
355 self.srcdirs = [os.path.normpath(a) for a in srcdirs] |
|
356 self.copy_debug = copy_debug |
|
357 self.vcsinfo = vcsinfo |
|
358 self.srcsrv = srcsrv |
|
359 self.exclude = exclude[:] |
|
360 if repo_manifest: |
|
361 self.parse_repo_manifest(repo_manifest) |
|
362 |
|
363 # book-keeping to keep track of our jobs and the cleanup work per file tuple |
|
364 self.files_record = {} |
|
365 self.jobs_record = collections.defaultdict(int) |
|
366 |
|
367 @classmethod |
|
368 def GlobalInit(cls, module=multiprocessing): |
|
369 """Initialize the class globals for the multiprocessing setup; must |
|
370 be called before any Dumper instances are created and used. Test cases |
|
371 may pass in a different module to supply Manager and Pool objects, |
|
372 usually multiprocessing.dummy.""" |
|
373 num_cpus = module.cpu_count() |
|
374 if num_cpus is None: |
|
375 # assume a dual core machine if we can't find out for some reason |
|
376 # probably better on single core anyway due to I/O constraints |
|
377 num_cpus = 2 |
|
378 |
|
379 # have to create any locks etc before the pool |
|
380 cls.manager = module.Manager() |
|
381 cls.jobs_condition = Dumper.manager.Condition() |
|
382 cls.lock = Dumper.manager.RLock() |
|
383 cls.srcdirRepoInfo = Dumper.manager.dict() |
|
384 cls.pool = module.Pool(num_cpus, WorkerInitializer, |
|
385 (cls, cls.lock, cls.srcdirRepoInfo)) |
|
386 |
|
387 def JobStarted(self, file_key): |
|
388 """Increments the number of submitted jobs for the specified key file, |
|
389 defined as the original file we processed; note that a single key file |
|
390 can generate up to 1 + len(self.archs) jobs in the Mac case.""" |
|
391 with Dumper.jobs_condition: |
|
392 self.jobs_record[file_key] += 1 |
|
393 Dumper.jobs_condition.notify_all() |
|
394 |
|
395 def JobFinished(self, file_key): |
|
396 """Decrements the number of submitted jobs for the specified key file, |
|
397 defined as the original file we processed; once the count is back to 0, |
|
398 remove the entry from our record.""" |
|
399 with Dumper.jobs_condition: |
|
400 self.jobs_record[file_key] -= 1 |
|
401 |
|
402 if self.jobs_record[file_key] == 0: |
|
403 del self.jobs_record[file_key] |
|
404 |
|
405 Dumper.jobs_condition.notify_all() |
|
406 |
|
407 def output(self, dest, output_str): |
|
408 """Writes |output_str| to |dest|, holding |lock|; |
|
409 terminates with a newline.""" |
|
410 with Dumper.lock: |
|
411 dest.write(output_str + "\n") |
|
412 dest.flush() |
|
413 |
|
414 def output_pid(self, dest, output_str): |
|
415 """Debugging output; prepends the pid to the string.""" |
|
416 self.output(dest, "%d: %s" % (os.getpid(), output_str)) |
|
417 |
|
418 def parse_repo_manifest(self, repo_manifest): |
|
419 """ |
|
420 Parse an XML manifest of repository info as produced |
|
421 by the `repo manifest -r` command. |
|
422 """ |
|
423 doc = parse(repo_manifest) |
|
424 if doc.firstChild.tagName != "manifest": |
|
425 return |
|
426 # First, get remotes. |
|
427 def ensure_slash(u): |
|
428 if not u.endswith("/"): |
|
429 return u + "/" |
|
430 return u |
|
431 remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")]) |
|
432 # And default remote. |
|
433 default_remote = None |
|
434 if doc.getElementsByTagName("default"): |
|
435 default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote") |
|
436 # Now get projects. Assume they're relative to repo_manifest. |
|
437 base_dir = os.path.abspath(os.path.dirname(repo_manifest)) |
|
438 for proj in doc.getElementsByTagName("project"): |
|
439 # name is the repository URL relative to the remote path. |
|
440 name = proj.getAttribute("name") |
|
441 # path is the path on-disk, relative to the manifest file. |
|
442 path = proj.getAttribute("path") |
|
443 # revision is the changeset ID. |
|
444 rev = proj.getAttribute("revision") |
|
445 # remote is the base URL to use. |
|
446 remote = proj.getAttribute("remote") |
|
447 # remote defaults to the <default remote>. |
|
448 if not remote: |
|
449 remote = default_remote |
|
450 # path defaults to name. |
|
451 if not path: |
|
452 path = name |
|
453 if not (name and path and rev and remote): |
|
454 print "Skipping project %s" % proj.toxml() |
|
455 continue |
|
456 remote = remotes[remote] |
|
457 # Turn git URLs into http URLs so that urljoin works. |
|
458 if remote.startswith("git:"): |
|
459 remote = "http" + remote[3:] |
|
460 # Add this project to srcdirs. |
|
461 srcdir = os.path.join(base_dir, path) |
|
462 self.srcdirs.append(srcdir) |
|
463 # And cache its VCS file info. Currently all repos mentioned |
|
464 # in a repo manifest are assumed to be git. |
|
465 root = urlparse.urljoin(remote, name) |
|
466 Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root) |
|
467 |
|
468 # subclasses override this |
|
469 def ShouldProcess(self, file): |
|
470 return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude) |
|
471 |
|
472 # and can override this |
|
473 def ShouldSkipDir(self, dir): |
|
474 return False |
|
475 |
|
476 def RunFileCommand(self, file): |
|
477 """Utility function, returns the output of file(1)""" |
|
478 try: |
|
479 # we use -L to read the targets of symlinks, |
|
480 # and -b to print just the content, not the filename |
|
481 return os.popen("file -Lb " + file).read() |
|
482 except: |
|
483 return "" |
|
484 |
|
485 # This is a no-op except on Win32 |
|
486 def FixFilenameCase(self, file): |
|
487 return file |
|
488 |
|
489 # This is a no-op except on Win32 |
|
490 def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): |
|
491 return "" |
|
492 |
|
493 # subclasses override this if they want to support this |
|
494 def CopyDebug(self, file, debug_file, guid): |
|
495 pass |
|
496 |
|
497 def Finish(self, stop_pool=True): |
|
498 """Wait for the expected number of jobs to be submitted, and then |
|
499 wait for the pool to finish processing them. By default, will close |
|
500 and clear the pool, but for testcases that need multiple runs, pass |
|
501 stop_pool = False.""" |
|
502 with Dumper.jobs_condition: |
|
503 while len(self.jobs_record) != 0: |
|
504 Dumper.jobs_condition.wait() |
|
505 if stop_pool: |
|
506 Dumper.pool.close() |
|
507 Dumper.pool.join() |
|
508 |
|
509 def Process(self, file_or_dir): |
|
510 """Process a file or all the (valid) files in a directory; processing is performed |
|
511 asynchronously, and Finish must be called to wait for it complete and cleanup.""" |
|
512 if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir): |
|
513 self.ProcessDir(file_or_dir) |
|
514 elif os.path.isfile(file_or_dir): |
|
515 self.ProcessFiles((file_or_dir,)) |
|
516 |
|
517 def ProcessDir(self, dir): |
|
518 """Process all the valid files in this directory. Valid files |
|
519 are determined by calling ShouldProcess; processing is performed |
|
520 asynchronously, and Finish must be called to wait for it complete and cleanup.""" |
|
521 for root, dirs, files in os.walk(dir): |
|
522 for d in dirs[:]: |
|
523 if self.ShouldSkipDir(d): |
|
524 dirs.remove(d) |
|
525 for f in files: |
|
526 fullpath = os.path.join(root, f) |
|
527 if self.ShouldProcess(fullpath): |
|
528 self.ProcessFiles((fullpath,)) |
|
529 |
|
530 def SubmitJob(self, file_key, func, args, callback): |
|
531 """Submits a job to the pool of workers; increments the number of submitted jobs.""" |
|
532 self.JobStarted(file_key) |
|
533 res = Dumper.pool.apply_async(func, args=args, callback=callback) |
|
534 |
|
535 def ProcessFilesFinished(self, res): |
|
536 """Callback from multiprocesing when ProcessFilesWork finishes; |
|
537 run the cleanup work, if any""" |
|
538 self.JobFinished(res['files'][-1]) |
|
539 # only run the cleanup function once per tuple of files |
|
540 self.files_record[res['files']] += 1 |
|
541 if self.files_record[res['files']] == len(self.archs): |
|
542 del self.files_record[res['files']] |
|
543 if res['after']: |
|
544 res['after'](res['status'], res['after_arg']) |
|
545 |
|
546 def ProcessFiles(self, files, after=None, after_arg=None): |
|
547 """Dump symbols from these files into a symbol file, stored |
|
548 in the proper directory structure in |symbol_path|; processing is performed |
|
549 asynchronously, and Finish must be called to wait for it complete and cleanup. |
|
550 All files after the first are fallbacks in case the first file does not process |
|
551 successfully; if it does, no other files will be touched.""" |
|
552 self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files)) |
|
553 |
|
554 # tries to get the vcs root from the .mozconfig first - if it's not set |
|
555 # the tinderbox vcs path will be assigned further down |
|
556 vcs_root = os.environ.get("SRCSRV_ROOT") |
|
557 for arch_num, arch in enumerate(self.archs): |
|
558 self.files_record[files] = 0 # record that we submitted jobs for this tuple of files |
|
559 self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished) |
|
560 |
|
561 def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg): |
|
562 self.output_pid(sys.stderr, "Worker processing files: %s" % (files,)) |
|
563 |
|
564 # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on |
|
565 result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files } |
|
566 |
|
567 sourceFileStream = '' |
|
568 for file in files: |
|
569 # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully |
|
570 try: |
|
571 proc = subprocess.Popen([self.dump_syms] + arch.split() + [file], |
|
572 stdout=subprocess.PIPE) |
|
573 module_line = proc.stdout.next() |
|
574 if module_line.startswith("MODULE"): |
|
575 # MODULE os cpu guid debug_file |
|
576 (guid, debug_file) = (module_line.split())[3:5] |
|
577 # strip off .pdb extensions, and append .sym |
|
578 sym_file = re.sub("\.pdb$", "", debug_file) + ".sym" |
|
579 # we do want forward slashes here |
|
580 rel_path = os.path.join(debug_file, |
|
581 guid, |
|
582 sym_file).replace("\\", "/") |
|
583 full_path = os.path.normpath(os.path.join(self.symbol_path, |
|
584 rel_path)) |
|
585 try: |
|
586 os.makedirs(os.path.dirname(full_path)) |
|
587 except OSError: # already exists |
|
588 pass |
|
589 f = open(full_path, "w") |
|
590 f.write(module_line) |
|
591 # now process the rest of the output |
|
592 for line in proc.stdout: |
|
593 if line.startswith("FILE"): |
|
594 # FILE index filename |
|
595 (x, index, filename) = line.rstrip().split(None, 2) |
|
596 if sys.platform == "sunos5": |
|
597 for srcdir in self.srcdirs: |
|
598 start = filename.find(self.srcdir) |
|
599 if start != -1: |
|
600 filename = filename[start:] |
|
601 break |
|
602 filename = self.FixFilenameCase(filename) |
|
603 sourcepath = filename |
|
604 if self.vcsinfo: |
|
605 (filename, rootname) = GetVCSFilename(filename, self.srcdirs) |
|
606 # sets vcs_root in case the loop through files were to end on an empty rootname |
|
607 if vcs_root is None: |
|
608 if rootname: |
|
609 vcs_root = rootname |
|
610 # gather up files with hg for indexing |
|
611 if filename.startswith("hg"): |
|
612 (ver, checkout, source_file, revision) = filename.split(":", 3) |
|
613 sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n" |
|
614 f.write("FILE %s %s\n" % (index, filename)) |
|
615 else: |
|
616 # pass through all other lines unchanged |
|
617 f.write(line) |
|
618 # we want to return true only if at least one line is not a MODULE or FILE line |
|
619 result['status'] = True |
|
620 f.close() |
|
621 proc.wait() |
|
622 # we output relative paths so callers can get a list of what |
|
623 # was generated |
|
624 self.output(sys.stdout, rel_path) |
|
625 if self.srcsrv and vcs_root: |
|
626 # add source server indexing to the pdb file |
|
627 self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root) |
|
628 # only copy debug the first time if we have multiple architectures |
|
629 if self.copy_debug and arch_num == 0: |
|
630 self.CopyDebug(file, debug_file, guid) |
|
631 except StopIteration: |
|
632 pass |
|
633 except e: |
|
634 self.output(sys.stderr, "Unexpected error: %s" % (str(e),)) |
|
635 raise |
|
636 if result['status']: |
|
637 # we only need 1 file to work |
|
638 break |
|
639 return result |
|
640 |
|
641 # Platform-specific subclasses. For the most part, these just have |
|
642 # logic to determine what files to extract symbols from. |
|
643 |
|
644 class Dumper_Win32(Dumper): |
|
645 fixedFilenameCaseCache = {} |
|
646 |
|
647 def ShouldProcess(self, file): |
|
648 """This function will allow processing of pdb files that have dll |
|
649 or exe files with the same base name next to them.""" |
|
650 if not Dumper.ShouldProcess(self, file): |
|
651 return False |
|
652 if file.endswith(".pdb"): |
|
653 (path,ext) = os.path.splitext(file) |
|
654 if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"): |
|
655 return True |
|
656 return False |
|
657 |
|
658 def FixFilenameCase(self, file): |
|
659 """Recent versions of Visual C++ put filenames into |
|
660 PDB files as all lowercase. If the file exists |
|
661 on the local filesystem, fix it.""" |
|
662 |
|
663 # Use a cached version if we have one. |
|
664 if file in self.fixedFilenameCaseCache: |
|
665 return self.fixedFilenameCaseCache[file] |
|
666 |
|
667 result = file |
|
668 |
|
669 (path, filename) = os.path.split(file) |
|
670 if os.path.isdir(path): |
|
671 lc_filename = filename.lower() |
|
672 for f in os.listdir(path): |
|
673 if f.lower() == lc_filename: |
|
674 result = os.path.join(path, f) |
|
675 break |
|
676 |
|
677 # Cache the corrected version to avoid future filesystem hits. |
|
678 self.fixedFilenameCaseCache[file] = result |
|
679 return result |
|
680 |
|
681 def CopyDebug(self, file, debug_file, guid): |
|
682 rel_path = os.path.join(debug_file, |
|
683 guid, |
|
684 debug_file).replace("\\", "/") |
|
685 full_path = os.path.normpath(os.path.join(self.symbol_path, |
|
686 rel_path)) |
|
687 shutil.copyfile(file, full_path) |
|
688 # try compressing it |
|
689 compressed_file = os.path.splitext(full_path)[0] + ".pd_" |
|
690 # ignore makecab's output |
|
691 success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D", |
|
692 "CompressionMemory=21", |
|
693 full_path, compressed_file], |
|
694 stdout=open("NUL:","w"), stderr=subprocess.STDOUT) |
|
695 if success == 0 and os.path.exists(compressed_file): |
|
696 os.unlink(full_path) |
|
697 self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_") |
|
698 else: |
|
699 self.output(sys.stdout, rel_path) |
|
700 |
|
701 def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): |
|
702 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing |
|
703 debug_file = os.path.abspath(debug_file) |
|
704 streamFilename = debug_file + ".stream" |
|
705 stream_output_path = os.path.abspath(streamFilename) |
|
706 # Call SourceIndex to create the .stream file |
|
707 result = SourceIndex(sourceFileStream, stream_output_path, vcs_root) |
|
708 if self.copy_debug: |
|
709 pdbstr_path = os.environ.get("PDBSTR_PATH") |
|
710 pdbstr = os.path.normpath(pdbstr_path) |
|
711 subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file), |
|
712 "-i:" + os.path.basename(streamFilename), "-s:srcsrv"], |
|
713 cwd=os.path.dirname(stream_output_path)) |
|
714 # clean up all the .stream files when done |
|
715 os.remove(stream_output_path) |
|
716 return result |
|
717 |
|
718 class Dumper_Linux(Dumper): |
|
719 objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy' |
|
720 def ShouldProcess(self, file): |
|
721 """This function will allow processing of files that are |
|
722 executable, or end with the .so extension, and additionally |
|
723 file(1) reports as being ELF files. It expects to find the file |
|
724 command in PATH.""" |
|
725 if not Dumper.ShouldProcess(self, file): |
|
726 return False |
|
727 if file.endswith(".so") or os.access(file, os.X_OK): |
|
728 return self.RunFileCommand(file).startswith("ELF") |
|
729 return False |
|
730 |
|
731 def CopyDebug(self, file, debug_file, guid): |
|
732 # We want to strip out the debug info, and add a |
|
733 # .gnu_debuglink section to the object, so the debugger can |
|
734 # actually load our debug info later. |
|
735 file_dbg = file + ".dbg" |
|
736 if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \ |
|
737 subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0: |
|
738 rel_path = os.path.join(debug_file, |
|
739 guid, |
|
740 debug_file + ".dbg") |
|
741 full_path = os.path.normpath(os.path.join(self.symbol_path, |
|
742 rel_path)) |
|
743 shutil.move(file_dbg, full_path) |
|
744 # gzip the shipped debug files |
|
745 os.system("gzip %s" % full_path) |
|
746 self.output(sys.stdout, rel_path + ".gz") |
|
747 else: |
|
748 if os.path.isfile(file_dbg): |
|
749 os.unlink(file_dbg) |
|
750 |
|
751 class Dumper_Solaris(Dumper): |
|
752 def RunFileCommand(self, file): |
|
753 """Utility function, returns the output of file(1)""" |
|
754 try: |
|
755 output = os.popen("file " + file).read() |
|
756 return output.split('\t')[1]; |
|
757 except: |
|
758 return "" |
|
759 |
|
760 def ShouldProcess(self, file): |
|
761 """This function will allow processing of files that are |
|
762 executable, or end with the .so extension, and additionally |
|
763 file(1) reports as being ELF files. It expects to find the file |
|
764 command in PATH.""" |
|
765 if not Dumper.ShouldProcess(self, file): |
|
766 return False |
|
767 if file.endswith(".so") or os.access(file, os.X_OK): |
|
768 return self.RunFileCommand(file).startswith("ELF") |
|
769 return False |
|
770 |
|
771 def StartProcessFilesWorkMac(dumper, file): |
|
772 """multiprocessing can't handle methods as Process targets, so we define |
|
773 a simple wrapper function around the work method.""" |
|
774 return dumper.ProcessFilesWorkMac(file) |
|
775 |
|
776 def AfterMac(status, dsymbundle): |
|
777 """Cleanup function to run on Macs after we process the file(s).""" |
|
778 # CopyDebug will already have been run from Dumper.ProcessFiles |
|
779 shutil.rmtree(dsymbundle) |
|
780 |
|
781 class Dumper_Mac(Dumper): |
|
782 def ShouldProcess(self, file): |
|
783 """This function will allow processing of files that are |
|
784 executable, or end with the .dylib extension, and additionally |
|
785 file(1) reports as being Mach-O files. It expects to find the file |
|
786 command in PATH.""" |
|
787 if not Dumper.ShouldProcess(self, file): |
|
788 return False |
|
789 if file.endswith(".dylib") or os.access(file, os.X_OK): |
|
790 return self.RunFileCommand(file).startswith("Mach-O") |
|
791 return False |
|
792 |
|
793 def ShouldSkipDir(self, dir): |
|
794 """We create .dSYM bundles on the fly, but if someone runs |
|
795 buildsymbols twice, we should skip any bundles we created |
|
796 previously, otherwise we'll recurse into them and try to |
|
797 dump the inner bits again.""" |
|
798 if dir.endswith(".dSYM"): |
|
799 return True |
|
800 return False |
|
801 |
|
802 def ProcessFiles(self, files, after=None, after_arg=None): |
|
803 # also note, files must be len 1 here, since we're the only ones |
|
804 # that ever add more than one file to the list |
|
805 self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0])) |
|
806 self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished) |
|
807 |
|
808 def ProcessFilesMacFinished(self, result): |
|
809 if result['status']: |
|
810 # kick off new jobs per-arch with our new list of files |
|
811 Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0]) |
|
812 # only decrement jobs *after* that, since otherwise we'll remove the record for this file |
|
813 self.JobFinished(result['files'][-1]) |
|
814 |
|
815 def ProcessFilesWorkMac(self, file): |
|
816 """dump_syms on Mac needs to be run on a dSYM bundle produced |
|
817 by dsymutil(1), so run dsymutil here and pass the bundle name |
|
818 down to the superclass method instead.""" |
|
819 self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,)) |
|
820 |
|
821 # our return is a status and a tuple of files to dump symbols for |
|
822 # the extra files are fallbacks; as soon as one is dumped successfully, we stop |
|
823 result = { 'status' : False, 'files' : None, 'file_key' : file } |
|
824 dsymbundle = file + ".dSYM" |
|
825 if os.path.exists(dsymbundle): |
|
826 shutil.rmtree(dsymbundle) |
|
827 # dsymutil takes --arch=foo instead of -a foo like everything else |
|
828 subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a] |
|
829 + [file], |
|
830 stdout=open("/dev/null","w")) |
|
831 if not os.path.exists(dsymbundle): |
|
832 # dsymutil won't produce a .dSYM for files without symbols |
|
833 self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,)) |
|
834 result['status'] = False |
|
835 result['files'] = (file, ) |
|
836 return result |
|
837 |
|
838 result['status'] = True |
|
839 result['files'] = (dsymbundle, file) |
|
840 return result |
|
841 |
|
842 def CopyDebug(self, file, debug_file, guid): |
|
843 """ProcessFiles has already produced a dSYM bundle, so we should just |
|
844 copy that to the destination directory. However, we'll package it |
|
845 into a .tar.bz2 because the debug symbols are pretty huge, and |
|
846 also because it's a bundle, so it's a directory. |file| here is the |
|
847 dSYM bundle, and |debug_file| is the original filename.""" |
|
848 rel_path = os.path.join(debug_file, |
|
849 guid, |
|
850 os.path.basename(file) + ".tar.bz2") |
|
851 full_path = os.path.abspath(os.path.join(self.symbol_path, |
|
852 rel_path)) |
|
853 success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)], |
|
854 cwd=os.path.dirname(file), |
|
855 stdout=open("/dev/null","w"), stderr=subprocess.STDOUT) |
|
856 if success == 0 and os.path.exists(full_path): |
|
857 self.output(sys.stdout, rel_path) |
|
858 |
|
859 # Entry point if called as a standalone program |
|
860 def main(): |
|
861 parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>") |
|
862 parser.add_option("-c", "--copy", |
|
863 action="store_true", dest="copy_debug", default=False, |
|
864 help="Copy debug info files into the same directory structure as symbol files") |
|
865 parser.add_option("-a", "--archs", |
|
866 action="store", dest="archs", |
|
867 help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)") |
|
868 parser.add_option("-s", "--srcdir", |
|
869 action="append", dest="srcdir", default=[], |
|
870 help="Use SRCDIR to determine relative paths to source files") |
|
871 parser.add_option("-v", "--vcs-info", |
|
872 action="store_true", dest="vcsinfo", |
|
873 help="Try to retrieve VCS info for each FILE listed in the output") |
|
874 parser.add_option("-i", "--source-index", |
|
875 action="store_true", dest="srcsrv", default=False, |
|
876 help="Add source index information to debug files, making them suitable for use in a source server.") |
|
877 parser.add_option("-x", "--exclude", |
|
878 action="append", dest="exclude", default=[], metavar="PATTERN", |
|
879 help="Skip processing files matching PATTERN.") |
|
880 parser.add_option("--repo-manifest", |
|
881 action="store", dest="repo_manifest", |
|
882 help="""Get source information from this XML manifest |
|
883 produced by the `repo manifest -r` command. |
|
884 """) |
|
885 (options, args) = parser.parse_args() |
|
886 |
|
887 #check to see if the pdbstr.exe exists |
|
888 if options.srcsrv: |
|
889 pdbstr = os.environ.get("PDBSTR_PATH") |
|
890 if not os.path.exists(pdbstr): |
|
891 print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n" |
|
892 sys.exit(1) |
|
893 |
|
894 if len(args) < 3: |
|
895 parser.error("not enough arguments") |
|
896 exit(1) |
|
897 |
|
898 dumper = GetPlatformSpecificDumper(dump_syms=args[0], |
|
899 symbol_path=args[1], |
|
900 copy_debug=options.copy_debug, |
|
901 archs=options.archs, |
|
902 srcdirs=options.srcdir, |
|
903 vcsinfo=options.vcsinfo, |
|
904 srcsrv=options.srcsrv, |
|
905 exclude=options.exclude, |
|
906 repo_manifest=options.repo_manifest) |
|
907 for arg in args[2:]: |
|
908 dumper.Process(arg) |
|
909 dumper.Finish() |
|
910 |
|
911 # run main if run directly |
|
912 if __name__ == "__main__": |
|
913 # set up the multiprocessing infrastructure before we start; |
|
914 # note that this needs to be in the __main__ guard, or else Windows will choke |
|
915 Dumper.GlobalInit() |
|
916 |
|
917 main() |