| |
1 #!/bin/env python |
| |
2 # This Source Code Form is subject to the terms of the Mozilla Public |
| |
3 # License, v. 2.0. If a copy of the MPL was not distributed with this |
| |
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
5 # |
| |
6 # Usage: symbolstore.py <params> <dump_syms path> <symbol store path> |
| |
7 # <debug info files or dirs> |
| |
8 # Runs dump_syms on each debug info file specified on the command line, |
| |
9 # then places the resulting symbol file in the proper directory |
| |
10 # structure in the symbol store path. Accepts multiple files |
| |
11 # on the command line, so can be called as part of a pipe using |
| |
12 # find <dir> | xargs symbolstore.pl <dump_syms> <storepath> |
| |
13 # But really, you might just want to pass it <dir>. |
| |
14 # |
| |
15 # Parameters accepted: |
| |
16 # -c : Copy debug info files to the same directory structure |
| |
17 # as sym files |
| |
18 # -a "<archs>" : Run dump_syms -a <arch> for each space separated |
| |
19 # cpu architecture in <archs> (only on OS X) |
| |
20 # -s <srcdir> : Use <srcdir> as the top source directory to |
| |
21 # generate relative filenames. |
| |
22 |
| |
23 import sys |
| |
24 import platform |
| |
25 import os |
| |
26 import re |
| |
27 import shutil |
| |
28 import textwrap |
| |
29 import fnmatch |
| |
30 import subprocess |
| |
31 import urlparse |
| |
32 import multiprocessing |
| |
33 import collections |
| |
34 from optparse import OptionParser |
| |
35 from xml.dom.minidom import parse |
| |
36 |
| |
37 # Utility classes |
| |
38 |
| |
39 class VCSFileInfo: |
| |
40 """ A base class for version-controlled file information. Ensures that the |
| |
41 following attributes are generated only once (successfully): |
| |
42 |
| |
43 self.root |
| |
44 self.clean_root |
| |
45 self.revision |
| |
46 self.filename |
| |
47 |
| |
48 The attributes are generated by a single call to the GetRoot, |
| |
49 GetRevision, and GetFilename methods. Those methods are explicitly not |
| |
50 implemented here and must be implemented in derived classes. """ |
| |
51 |
| |
52 def __init__(self, file): |
| |
53 if not file: |
| |
54 raise ValueError |
| |
55 self.file = file |
| |
56 |
| |
57 def __getattr__(self, name): |
| |
58 """ __getattr__ is only called for attributes that are not set on self, |
| |
59 so setting self.[attr] will prevent future calls to the GetRoot, |
| |
60 GetRevision, and GetFilename methods. We don't set the values on |
| |
61 failure on the off chance that a future call might succeed. """ |
| |
62 |
| |
63 if name == "root": |
| |
64 root = self.GetRoot() |
| |
65 if root: |
| |
66 self.root = root |
| |
67 return root |
| |
68 |
| |
69 elif name == "clean_root": |
| |
70 clean_root = self.GetCleanRoot() |
| |
71 if clean_root: |
| |
72 self.clean_root = clean_root |
| |
73 return clean_root |
| |
74 |
| |
75 elif name == "revision": |
| |
76 revision = self.GetRevision() |
| |
77 if revision: |
| |
78 self.revision = revision |
| |
79 return revision |
| |
80 |
| |
81 elif name == "filename": |
| |
82 filename = self.GetFilename() |
| |
83 if filename: |
| |
84 self.filename = filename |
| |
85 return filename |
| |
86 |
| |
87 raise AttributeError |
| |
88 |
| |
89 def GetRoot(self): |
| |
90 """ This method should return the unmodified root for the file or 'None' |
| |
91 on failure. """ |
| |
92 raise NotImplementedError |
| |
93 |
| |
94 def GetCleanRoot(self): |
| |
95 """ This method should return the repository root for the file or 'None' |
| |
96 on failure. """ |
| |
97 raise NotImplementedErrors |
| |
98 |
| |
99 def GetRevision(self): |
| |
100 """ This method should return the revision number for the file or 'None' |
| |
101 on failure. """ |
| |
102 raise NotImplementedError |
| |
103 |
| |
104 def GetFilename(self): |
| |
105 """ This method should return the repository-specific filename for the |
| |
106 file or 'None' on failure. """ |
| |
107 raise NotImplementedError |
| |
108 |
| |
109 |
| |
110 # This regex separates protocol and optional username/password from a url. |
| |
111 # For instance, all the following urls will be transformed into |
| |
112 # 'foo.com/bar': |
| |
113 # |
| |
114 # http://foo.com/bar |
| |
115 # svn+ssh://user@foo.com/bar |
| |
116 # svn+ssh://user:pass@foo.com/bar |
| |
117 # |
| |
118 rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$') |
| |
119 |
| |
120 def read_output(*args): |
| |
121 (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate() |
| |
122 return stdout.rstrip() |
| |
123 |
| |
124 class HGRepoInfo: |
| |
125 def __init__(self, path): |
| |
126 self.path = path |
| |
127 rev = read_output('hg', '-R', path, |
| |
128 'parent', '--template={node|short}') |
| |
129 # Look for the default hg path. If SRVSRV_ROOT is set, we |
| |
130 # don't bother asking hg. |
| |
131 hg_root = os.environ.get("SRCSRV_ROOT") |
| |
132 if hg_root: |
| |
133 root = hg_root |
| |
134 else: |
| |
135 root = read_output('hg', '-R', path, |
| |
136 'showconfig', 'paths.default') |
| |
137 if not root: |
| |
138 print >> sys.stderr, "Failed to get HG Repo for %s" % path |
| |
139 cleanroot = None |
| |
140 if root: |
| |
141 match = rootRegex.match(root) |
| |
142 if match: |
| |
143 cleanroot = match.group(1) |
| |
144 if cleanroot.endswith('/'): |
| |
145 cleanroot = cleanroot[:-1] |
| |
146 if cleanroot is None: |
| |
147 print >> sys.stderr, textwrap.dedent("""\ |
| |
148 Could not determine repo info for %s. This is either not a clone of the web-based |
| |
149 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path |
| |
150 sys.exit(1) |
| |
151 self.rev = rev |
| |
152 self.root = root |
| |
153 self.cleanroot = cleanroot |
| |
154 |
| |
155 def GetFileInfo(self, file): |
| |
156 return HGFileInfo(file, self) |
| |
157 |
| |
158 class HGFileInfo(VCSFileInfo): |
| |
159 def __init__(self, file, repo): |
| |
160 VCSFileInfo.__init__(self, file) |
| |
161 self.repo = repo |
| |
162 self.file = os.path.relpath(file, repo.path) |
| |
163 |
| |
164 def GetRoot(self): |
| |
165 return self.repo.root |
| |
166 |
| |
167 def GetCleanRoot(self): |
| |
168 return self.repo.cleanroot |
| |
169 |
| |
170 def GetRevision(self): |
| |
171 return self.repo.rev |
| |
172 |
| |
173 def GetFilename(self): |
| |
174 if self.revision and self.clean_root: |
| |
175 return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision) |
| |
176 return self.file |
| |
177 |
| |
178 class GitRepoInfo: |
| |
179 """ |
| |
180 Info about a local git repository. Does not currently |
| |
181 support discovering info about a git clone, the info must be |
| |
182 provided out-of-band. |
| |
183 """ |
| |
184 def __init__(self, path, rev, root): |
| |
185 self.path = path |
| |
186 cleanroot = None |
| |
187 if root: |
| |
188 match = rootRegex.match(root) |
| |
189 if match: |
| |
190 cleanroot = match.group(1) |
| |
191 if cleanroot.endswith('/'): |
| |
192 cleanroot = cleanroot[:-1] |
| |
193 if cleanroot is None: |
| |
194 print >> sys.stderr, textwrap.dedent("""\ |
| |
195 Could not determine repo info for %s (%s). This is either not a clone of a web-based |
| |
196 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root) |
| |
197 sys.exit(1) |
| |
198 self.rev = rev |
| |
199 self.cleanroot = cleanroot |
| |
200 |
| |
201 def GetFileInfo(self, file): |
| |
202 return GitFileInfo(file, self) |
| |
203 |
| |
204 class GitFileInfo(VCSFileInfo): |
| |
205 def __init__(self, file, repo): |
| |
206 VCSFileInfo.__init__(self, file) |
| |
207 self.repo = repo |
| |
208 self.file = os.path.relpath(file, repo.path) |
| |
209 |
| |
210 def GetRoot(self): |
| |
211 return self.repo.path |
| |
212 |
| |
213 def GetCleanRoot(self): |
| |
214 return self.repo.cleanroot |
| |
215 |
| |
216 def GetRevision(self): |
| |
217 return self.repo.rev |
| |
218 |
| |
219 def GetFilename(self): |
| |
220 if self.revision and self.clean_root: |
| |
221 return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision) |
| |
222 return self.file |
| |
223 |
| |
224 # Utility functions |
| |
225 |
| |
226 # A cache of files for which VCS info has already been determined. Used to |
| |
227 # prevent extra filesystem activity or process launching. |
| |
228 vcsFileInfoCache = {} |
| |
229 |
| |
230 def IsInDir(file, dir): |
| |
231 # the lower() is to handle win32+vc8, where |
| |
232 # the source filenames come out all lowercase, |
| |
233 # but the srcdir can be mixed case |
| |
234 return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower()) |
| |
235 |
| |
236 def GetVCSFilenameFromSrcdir(file, srcdir): |
| |
237 if srcdir not in Dumper.srcdirRepoInfo: |
| |
238 # Not in cache, so find it adnd cache it |
| |
239 if os.path.isdir(os.path.join(srcdir, '.hg')): |
| |
240 Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir) |
| |
241 else: |
| |
242 # Unknown VCS or file is not in a repo. |
| |
243 return None |
| |
244 return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file) |
| |
245 |
| |
246 def GetVCSFilename(file, srcdirs): |
| |
247 """Given a full path to a file, and the top source directory, |
| |
248 look for version control information about this file, and return |
| |
249 a tuple containing |
| |
250 1) a specially formatted filename that contains the VCS type, |
| |
251 VCS location, relative filename, and revision number, formatted like: |
| |
252 vcs:vcs location:filename:revision |
| |
253 For example: |
| |
254 cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36 |
| |
255 2) the unmodified root information if it exists""" |
| |
256 (path, filename) = os.path.split(file) |
| |
257 if path == '' or filename == '': |
| |
258 return (file, None) |
| |
259 |
| |
260 fileInfo = None |
| |
261 root = '' |
| |
262 if file in vcsFileInfoCache: |
| |
263 # Already cached this info, use it. |
| |
264 fileInfo = vcsFileInfoCache[file] |
| |
265 else: |
| |
266 for srcdir in srcdirs: |
| |
267 if not IsInDir(file, srcdir): |
| |
268 continue |
| |
269 fileInfo = GetVCSFilenameFromSrcdir(file, srcdir) |
| |
270 if fileInfo: |
| |
271 vcsFileInfoCache[file] = fileInfo |
| |
272 break |
| |
273 |
| |
274 if fileInfo: |
| |
275 file = fileInfo.filename |
| |
276 root = fileInfo.root |
| |
277 |
| |
278 # we want forward slashes on win32 paths |
| |
279 return (file.replace("\\", "/"), root) |
| |
280 |
| |
281 def GetPlatformSpecificDumper(**kwargs): |
| |
282 """This function simply returns a instance of a subclass of Dumper |
| |
283 that is appropriate for the current platform.""" |
| |
284 # Python 2.5 has a bug where platform.system() returns 'Microsoft'. |
| |
285 # Remove this when we no longer support Python 2.5. |
| |
286 return {'Windows': Dumper_Win32, |
| |
287 'Microsoft': Dumper_Win32, |
| |
288 'Linux': Dumper_Linux, |
| |
289 'Sunos5': Dumper_Solaris, |
| |
290 'Darwin': Dumper_Mac}[platform.system()](**kwargs) |
| |
291 |
| |
292 def SourceIndex(fileStream, outputPath, vcs_root): |
| |
293 """Takes a list of files, writes info to a data block in a .stream file""" |
| |
294 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing |
| |
295 # Create the srcsrv data block that indexes the pdb file |
| |
296 result = True |
| |
297 pdbStreamFile = open(outputPath, "w") |
| |
298 pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''') |
| |
299 pdbStreamFile.write(vcs_root) |
| |
300 pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''') |
| |
301 pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above |
| |
302 pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n") |
| |
303 pdbStreamFile.close() |
| |
304 return result |
| |
305 |
| |
306 def WorkerInitializer(cls, lock, srcdirRepoInfo): |
| |
307 """Windows worker processes won't have run GlobalInit, and due to a lack of fork(), |
| |
308 won't inherit the class variables from the parent. They only need a few variables, |
| |
309 so we run an initializer to set them. Redundant but harmless on other platforms.""" |
| |
310 cls.lock = lock |
| |
311 cls.srcdirRepoInfo = srcdirRepoInfo |
| |
312 |
| |
313 def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg): |
| |
314 """multiprocessing can't handle methods as Process targets, so we define |
| |
315 a simple wrapper function around the work method.""" |
| |
316 return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg) |
| |
317 |
| |
318 class Dumper: |
| |
319 """This class can dump symbols from a file with debug info, and |
| |
320 store the output in a directory structure that is valid for use as |
| |
321 a Breakpad symbol server. Requires a path to a dump_syms binary-- |
| |
322 |dump_syms| and a directory to store symbols in--|symbol_path|. |
| |
323 Optionally takes a list of processor architectures to process from |
| |
324 each debug file--|archs|, the full path to the top source |
| |
325 directory--|srcdir|, for generating relative source file names, |
| |
326 and an option to copy debug info files alongside the dumped |
| |
327 symbol files--|copy_debug|, mostly useful for creating a |
| |
328 Microsoft Symbol Server from the resulting output. |
| |
329 |
| |
330 You don't want to use this directly if you intend to call |
| |
331 ProcessDir. Instead, call GetPlatformSpecificDumper to |
| |
332 get an instance of a subclass. |
| |
333 |
| |
334 Processing is performed asynchronously via worker processes; in |
| |
335 order to wait for processing to finish and cleanup correctly, you |
| |
336 must call Finish after all Process/ProcessDir calls have been made. |
| |
337 You must also call Dumper.GlobalInit before creating or using any |
| |
338 instances.""" |
| |
339 def __init__(self, dump_syms, symbol_path, |
| |
340 archs=None, |
| |
341 srcdirs=[], |
| |
342 copy_debug=False, |
| |
343 vcsinfo=False, |
| |
344 srcsrv=False, |
| |
345 exclude=[], |
| |
346 repo_manifest=None): |
| |
347 # popen likes absolute paths, at least on windows |
| |
348 self.dump_syms = os.path.abspath(dump_syms) |
| |
349 self.symbol_path = symbol_path |
| |
350 if archs is None: |
| |
351 # makes the loop logic simpler |
| |
352 self.archs = [''] |
| |
353 else: |
| |
354 self.archs = ['-a %s' % a for a in archs.split()] |
| |
355 self.srcdirs = [os.path.normpath(a) for a in srcdirs] |
| |
356 self.copy_debug = copy_debug |
| |
357 self.vcsinfo = vcsinfo |
| |
358 self.srcsrv = srcsrv |
| |
359 self.exclude = exclude[:] |
| |
360 if repo_manifest: |
| |
361 self.parse_repo_manifest(repo_manifest) |
| |
362 |
| |
363 # book-keeping to keep track of our jobs and the cleanup work per file tuple |
| |
364 self.files_record = {} |
| |
365 self.jobs_record = collections.defaultdict(int) |
| |
366 |
| |
367 @classmethod |
| |
368 def GlobalInit(cls, module=multiprocessing): |
| |
369 """Initialize the class globals for the multiprocessing setup; must |
| |
370 be called before any Dumper instances are created and used. Test cases |
| |
371 may pass in a different module to supply Manager and Pool objects, |
| |
372 usually multiprocessing.dummy.""" |
| |
373 num_cpus = module.cpu_count() |
| |
374 if num_cpus is None: |
| |
375 # assume a dual core machine if we can't find out for some reason |
| |
376 # probably better on single core anyway due to I/O constraints |
| |
377 num_cpus = 2 |
| |
378 |
| |
379 # have to create any locks etc before the pool |
| |
380 cls.manager = module.Manager() |
| |
381 cls.jobs_condition = Dumper.manager.Condition() |
| |
382 cls.lock = Dumper.manager.RLock() |
| |
383 cls.srcdirRepoInfo = Dumper.manager.dict() |
| |
384 cls.pool = module.Pool(num_cpus, WorkerInitializer, |
| |
385 (cls, cls.lock, cls.srcdirRepoInfo)) |
| |
386 |
| |
387 def JobStarted(self, file_key): |
| |
388 """Increments the number of submitted jobs for the specified key file, |
| |
389 defined as the original file we processed; note that a single key file |
| |
390 can generate up to 1 + len(self.archs) jobs in the Mac case.""" |
| |
391 with Dumper.jobs_condition: |
| |
392 self.jobs_record[file_key] += 1 |
| |
393 Dumper.jobs_condition.notify_all() |
| |
394 |
| |
395 def JobFinished(self, file_key): |
| |
396 """Decrements the number of submitted jobs for the specified key file, |
| |
397 defined as the original file we processed; once the count is back to 0, |
| |
398 remove the entry from our record.""" |
| |
399 with Dumper.jobs_condition: |
| |
400 self.jobs_record[file_key] -= 1 |
| |
401 |
| |
402 if self.jobs_record[file_key] == 0: |
| |
403 del self.jobs_record[file_key] |
| |
404 |
| |
405 Dumper.jobs_condition.notify_all() |
| |
406 |
| |
407 def output(self, dest, output_str): |
| |
408 """Writes |output_str| to |dest|, holding |lock|; |
| |
409 terminates with a newline.""" |
| |
410 with Dumper.lock: |
| |
411 dest.write(output_str + "\n") |
| |
412 dest.flush() |
| |
413 |
| |
414 def output_pid(self, dest, output_str): |
| |
415 """Debugging output; prepends the pid to the string.""" |
| |
416 self.output(dest, "%d: %s" % (os.getpid(), output_str)) |
| |
417 |
| |
418 def parse_repo_manifest(self, repo_manifest): |
| |
419 """ |
| |
420 Parse an XML manifest of repository info as produced |
| |
421 by the `repo manifest -r` command. |
| |
422 """ |
| |
423 doc = parse(repo_manifest) |
| |
424 if doc.firstChild.tagName != "manifest": |
| |
425 return |
| |
426 # First, get remotes. |
| |
427 def ensure_slash(u): |
| |
428 if not u.endswith("/"): |
| |
429 return u + "/" |
| |
430 return u |
| |
431 remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")]) |
| |
432 # And default remote. |
| |
433 default_remote = None |
| |
434 if doc.getElementsByTagName("default"): |
| |
435 default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote") |
| |
436 # Now get projects. Assume they're relative to repo_manifest. |
| |
437 base_dir = os.path.abspath(os.path.dirname(repo_manifest)) |
| |
438 for proj in doc.getElementsByTagName("project"): |
| |
439 # name is the repository URL relative to the remote path. |
| |
440 name = proj.getAttribute("name") |
| |
441 # path is the path on-disk, relative to the manifest file. |
| |
442 path = proj.getAttribute("path") |
| |
443 # revision is the changeset ID. |
| |
444 rev = proj.getAttribute("revision") |
| |
445 # remote is the base URL to use. |
| |
446 remote = proj.getAttribute("remote") |
| |
447 # remote defaults to the <default remote>. |
| |
448 if not remote: |
| |
449 remote = default_remote |
| |
450 # path defaults to name. |
| |
451 if not path: |
| |
452 path = name |
| |
453 if not (name and path and rev and remote): |
| |
454 print "Skipping project %s" % proj.toxml() |
| |
455 continue |
| |
456 remote = remotes[remote] |
| |
457 # Turn git URLs into http URLs so that urljoin works. |
| |
458 if remote.startswith("git:"): |
| |
459 remote = "http" + remote[3:] |
| |
460 # Add this project to srcdirs. |
| |
461 srcdir = os.path.join(base_dir, path) |
| |
462 self.srcdirs.append(srcdir) |
| |
463 # And cache its VCS file info. Currently all repos mentioned |
| |
464 # in a repo manifest are assumed to be git. |
| |
465 root = urlparse.urljoin(remote, name) |
| |
466 Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root) |
| |
467 |
| |
468 # subclasses override this |
| |
469 def ShouldProcess(self, file): |
| |
470 return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude) |
| |
471 |
| |
472 # and can override this |
| |
473 def ShouldSkipDir(self, dir): |
| |
474 return False |
| |
475 |
| |
476 def RunFileCommand(self, file): |
| |
477 """Utility function, returns the output of file(1)""" |
| |
478 try: |
| |
479 # we use -L to read the targets of symlinks, |
| |
480 # and -b to print just the content, not the filename |
| |
481 return os.popen("file -Lb " + file).read() |
| |
482 except: |
| |
483 return "" |
| |
484 |
| |
485 # This is a no-op except on Win32 |
| |
486 def FixFilenameCase(self, file): |
| |
487 return file |
| |
488 |
| |
489 # This is a no-op except on Win32 |
| |
490 def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): |
| |
491 return "" |
| |
492 |
| |
493 # subclasses override this if they want to support this |
| |
494 def CopyDebug(self, file, debug_file, guid): |
| |
495 pass |
| |
496 |
| |
497 def Finish(self, stop_pool=True): |
| |
498 """Wait for the expected number of jobs to be submitted, and then |
| |
499 wait for the pool to finish processing them. By default, will close |
| |
500 and clear the pool, but for testcases that need multiple runs, pass |
| |
501 stop_pool = False.""" |
| |
502 with Dumper.jobs_condition: |
| |
503 while len(self.jobs_record) != 0: |
| |
504 Dumper.jobs_condition.wait() |
| |
505 if stop_pool: |
| |
506 Dumper.pool.close() |
| |
507 Dumper.pool.join() |
| |
508 |
| |
509 def Process(self, file_or_dir): |
| |
510 """Process a file or all the (valid) files in a directory; processing is performed |
| |
511 asynchronously, and Finish must be called to wait for it complete and cleanup.""" |
| |
512 if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir): |
| |
513 self.ProcessDir(file_or_dir) |
| |
514 elif os.path.isfile(file_or_dir): |
| |
515 self.ProcessFiles((file_or_dir,)) |
| |
516 |
| |
517 def ProcessDir(self, dir): |
| |
518 """Process all the valid files in this directory. Valid files |
| |
519 are determined by calling ShouldProcess; processing is performed |
| |
520 asynchronously, and Finish must be called to wait for it complete and cleanup.""" |
| |
521 for root, dirs, files in os.walk(dir): |
| |
522 for d in dirs[:]: |
| |
523 if self.ShouldSkipDir(d): |
| |
524 dirs.remove(d) |
| |
525 for f in files: |
| |
526 fullpath = os.path.join(root, f) |
| |
527 if self.ShouldProcess(fullpath): |
| |
528 self.ProcessFiles((fullpath,)) |
| |
529 |
| |
530 def SubmitJob(self, file_key, func, args, callback): |
| |
531 """Submits a job to the pool of workers; increments the number of submitted jobs.""" |
| |
532 self.JobStarted(file_key) |
| |
533 res = Dumper.pool.apply_async(func, args=args, callback=callback) |
| |
534 |
| |
535 def ProcessFilesFinished(self, res): |
| |
536 """Callback from multiprocesing when ProcessFilesWork finishes; |
| |
537 run the cleanup work, if any""" |
| |
538 self.JobFinished(res['files'][-1]) |
| |
539 # only run the cleanup function once per tuple of files |
| |
540 self.files_record[res['files']] += 1 |
| |
541 if self.files_record[res['files']] == len(self.archs): |
| |
542 del self.files_record[res['files']] |
| |
543 if res['after']: |
| |
544 res['after'](res['status'], res['after_arg']) |
| |
545 |
| |
546 def ProcessFiles(self, files, after=None, after_arg=None): |
| |
547 """Dump symbols from these files into a symbol file, stored |
| |
548 in the proper directory structure in |symbol_path|; processing is performed |
| |
549 asynchronously, and Finish must be called to wait for it complete and cleanup. |
| |
550 All files after the first are fallbacks in case the first file does not process |
| |
551 successfully; if it does, no other files will be touched.""" |
| |
552 self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files)) |
| |
553 |
| |
554 # tries to get the vcs root from the .mozconfig first - if it's not set |
| |
555 # the tinderbox vcs path will be assigned further down |
| |
556 vcs_root = os.environ.get("SRCSRV_ROOT") |
| |
557 for arch_num, arch in enumerate(self.archs): |
| |
558 self.files_record[files] = 0 # record that we submitted jobs for this tuple of files |
| |
559 self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished) |
| |
560 |
| |
561 def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg): |
| |
562 self.output_pid(sys.stderr, "Worker processing files: %s" % (files,)) |
| |
563 |
| |
564 # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on |
| |
565 result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files } |
| |
566 |
| |
567 sourceFileStream = '' |
| |
568 for file in files: |
| |
569 # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully |
| |
570 try: |
| |
571 proc = subprocess.Popen([self.dump_syms] + arch.split() + [file], |
| |
572 stdout=subprocess.PIPE) |
| |
573 module_line = proc.stdout.next() |
| |
574 if module_line.startswith("MODULE"): |
| |
575 # MODULE os cpu guid debug_file |
| |
576 (guid, debug_file) = (module_line.split())[3:5] |
| |
577 # strip off .pdb extensions, and append .sym |
| |
578 sym_file = re.sub("\.pdb$", "", debug_file) + ".sym" |
| |
579 # we do want forward slashes here |
| |
580 rel_path = os.path.join(debug_file, |
| |
581 guid, |
| |
582 sym_file).replace("\\", "/") |
| |
583 full_path = os.path.normpath(os.path.join(self.symbol_path, |
| |
584 rel_path)) |
| |
585 try: |
| |
586 os.makedirs(os.path.dirname(full_path)) |
| |
587 except OSError: # already exists |
| |
588 pass |
| |
589 f = open(full_path, "w") |
| |
590 f.write(module_line) |
| |
591 # now process the rest of the output |
| |
592 for line in proc.stdout: |
| |
593 if line.startswith("FILE"): |
| |
594 # FILE index filename |
| |
595 (x, index, filename) = line.rstrip().split(None, 2) |
| |
596 if sys.platform == "sunos5": |
| |
597 for srcdir in self.srcdirs: |
| |
598 start = filename.find(self.srcdir) |
| |
599 if start != -1: |
| |
600 filename = filename[start:] |
| |
601 break |
| |
602 filename = self.FixFilenameCase(filename) |
| |
603 sourcepath = filename |
| |
604 if self.vcsinfo: |
| |
605 (filename, rootname) = GetVCSFilename(filename, self.srcdirs) |
| |
606 # sets vcs_root in case the loop through files were to end on an empty rootname |
| |
607 if vcs_root is None: |
| |
608 if rootname: |
| |
609 vcs_root = rootname |
| |
610 # gather up files with hg for indexing |
| |
611 if filename.startswith("hg"): |
| |
612 (ver, checkout, source_file, revision) = filename.split(":", 3) |
| |
613 sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n" |
| |
614 f.write("FILE %s %s\n" % (index, filename)) |
| |
615 else: |
| |
616 # pass through all other lines unchanged |
| |
617 f.write(line) |
| |
618 # we want to return true only if at least one line is not a MODULE or FILE line |
| |
619 result['status'] = True |
| |
620 f.close() |
| |
621 proc.wait() |
| |
622 # we output relative paths so callers can get a list of what |
| |
623 # was generated |
| |
624 self.output(sys.stdout, rel_path) |
| |
625 if self.srcsrv and vcs_root: |
| |
626 # add source server indexing to the pdb file |
| |
627 self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root) |
| |
628 # only copy debug the first time if we have multiple architectures |
| |
629 if self.copy_debug and arch_num == 0: |
| |
630 self.CopyDebug(file, debug_file, guid) |
| |
631 except StopIteration: |
| |
632 pass |
| |
633 except e: |
| |
634 self.output(sys.stderr, "Unexpected error: %s" % (str(e),)) |
| |
635 raise |
| |
636 if result['status']: |
| |
637 # we only need 1 file to work |
| |
638 break |
| |
639 return result |
| |
640 |
| |
641 # Platform-specific subclasses. For the most part, these just have |
| |
642 # logic to determine what files to extract symbols from. |
| |
643 |
| |
644 class Dumper_Win32(Dumper): |
| |
645 fixedFilenameCaseCache = {} |
| |
646 |
| |
647 def ShouldProcess(self, file): |
| |
648 """This function will allow processing of pdb files that have dll |
| |
649 or exe files with the same base name next to them.""" |
| |
650 if not Dumper.ShouldProcess(self, file): |
| |
651 return False |
| |
652 if file.endswith(".pdb"): |
| |
653 (path,ext) = os.path.splitext(file) |
| |
654 if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"): |
| |
655 return True |
| |
656 return False |
| |
657 |
| |
658 def FixFilenameCase(self, file): |
| |
659 """Recent versions of Visual C++ put filenames into |
| |
660 PDB files as all lowercase. If the file exists |
| |
661 on the local filesystem, fix it.""" |
| |
662 |
| |
663 # Use a cached version if we have one. |
| |
664 if file in self.fixedFilenameCaseCache: |
| |
665 return self.fixedFilenameCaseCache[file] |
| |
666 |
| |
667 result = file |
| |
668 |
| |
669 (path, filename) = os.path.split(file) |
| |
670 if os.path.isdir(path): |
| |
671 lc_filename = filename.lower() |
| |
672 for f in os.listdir(path): |
| |
673 if f.lower() == lc_filename: |
| |
674 result = os.path.join(path, f) |
| |
675 break |
| |
676 |
| |
677 # Cache the corrected version to avoid future filesystem hits. |
| |
678 self.fixedFilenameCaseCache[file] = result |
| |
679 return result |
| |
680 |
| |
681 def CopyDebug(self, file, debug_file, guid): |
| |
682 rel_path = os.path.join(debug_file, |
| |
683 guid, |
| |
684 debug_file).replace("\\", "/") |
| |
685 full_path = os.path.normpath(os.path.join(self.symbol_path, |
| |
686 rel_path)) |
| |
687 shutil.copyfile(file, full_path) |
| |
688 # try compressing it |
| |
689 compressed_file = os.path.splitext(full_path)[0] + ".pd_" |
| |
690 # ignore makecab's output |
| |
691 success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D", |
| |
692 "CompressionMemory=21", |
| |
693 full_path, compressed_file], |
| |
694 stdout=open("NUL:","w"), stderr=subprocess.STDOUT) |
| |
695 if success == 0 and os.path.exists(compressed_file): |
| |
696 os.unlink(full_path) |
| |
697 self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_") |
| |
698 else: |
| |
699 self.output(sys.stdout, rel_path) |
| |
700 |
| |
701 def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): |
| |
702 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing |
| |
703 debug_file = os.path.abspath(debug_file) |
| |
704 streamFilename = debug_file + ".stream" |
| |
705 stream_output_path = os.path.abspath(streamFilename) |
| |
706 # Call SourceIndex to create the .stream file |
| |
707 result = SourceIndex(sourceFileStream, stream_output_path, vcs_root) |
| |
708 if self.copy_debug: |
| |
709 pdbstr_path = os.environ.get("PDBSTR_PATH") |
| |
710 pdbstr = os.path.normpath(pdbstr_path) |
| |
711 subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file), |
| |
712 "-i:" + os.path.basename(streamFilename), "-s:srcsrv"], |
| |
713 cwd=os.path.dirname(stream_output_path)) |
| |
714 # clean up all the .stream files when done |
| |
715 os.remove(stream_output_path) |
| |
716 return result |
| |
717 |
| |
718 class Dumper_Linux(Dumper): |
| |
719 objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy' |
| |
720 def ShouldProcess(self, file): |
| |
721 """This function will allow processing of files that are |
| |
722 executable, or end with the .so extension, and additionally |
| |
723 file(1) reports as being ELF files. It expects to find the file |
| |
724 command in PATH.""" |
| |
725 if not Dumper.ShouldProcess(self, file): |
| |
726 return False |
| |
727 if file.endswith(".so") or os.access(file, os.X_OK): |
| |
728 return self.RunFileCommand(file).startswith("ELF") |
| |
729 return False |
| |
730 |
| |
731 def CopyDebug(self, file, debug_file, guid): |
| |
732 # We want to strip out the debug info, and add a |
| |
733 # .gnu_debuglink section to the object, so the debugger can |
| |
734 # actually load our debug info later. |
| |
735 file_dbg = file + ".dbg" |
| |
736 if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \ |
| |
737 subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0: |
| |
738 rel_path = os.path.join(debug_file, |
| |
739 guid, |
| |
740 debug_file + ".dbg") |
| |
741 full_path = os.path.normpath(os.path.join(self.symbol_path, |
| |
742 rel_path)) |
| |
743 shutil.move(file_dbg, full_path) |
| |
744 # gzip the shipped debug files |
| |
745 os.system("gzip %s" % full_path) |
| |
746 self.output(sys.stdout, rel_path + ".gz") |
| |
747 else: |
| |
748 if os.path.isfile(file_dbg): |
| |
749 os.unlink(file_dbg) |
| |
750 |
| |
751 class Dumper_Solaris(Dumper): |
| |
752 def RunFileCommand(self, file): |
| |
753 """Utility function, returns the output of file(1)""" |
| |
754 try: |
| |
755 output = os.popen("file " + file).read() |
| |
756 return output.split('\t')[1]; |
| |
757 except: |
| |
758 return "" |
| |
759 |
| |
760 def ShouldProcess(self, file): |
| |
761 """This function will allow processing of files that are |
| |
762 executable, or end with the .so extension, and additionally |
| |
763 file(1) reports as being ELF files. It expects to find the file |
| |
764 command in PATH.""" |
| |
765 if not Dumper.ShouldProcess(self, file): |
| |
766 return False |
| |
767 if file.endswith(".so") or os.access(file, os.X_OK): |
| |
768 return self.RunFileCommand(file).startswith("ELF") |
| |
769 return False |
| |
770 |
| |
771 def StartProcessFilesWorkMac(dumper, file): |
| |
772 """multiprocessing can't handle methods as Process targets, so we define |
| |
773 a simple wrapper function around the work method.""" |
| |
774 return dumper.ProcessFilesWorkMac(file) |
| |
775 |
| |
776 def AfterMac(status, dsymbundle): |
| |
777 """Cleanup function to run on Macs after we process the file(s).""" |
| |
778 # CopyDebug will already have been run from Dumper.ProcessFiles |
| |
779 shutil.rmtree(dsymbundle) |
| |
780 |
| |
781 class Dumper_Mac(Dumper): |
| |
782 def ShouldProcess(self, file): |
| |
783 """This function will allow processing of files that are |
| |
784 executable, or end with the .dylib extension, and additionally |
| |
785 file(1) reports as being Mach-O files. It expects to find the file |
| |
786 command in PATH.""" |
| |
787 if not Dumper.ShouldProcess(self, file): |
| |
788 return False |
| |
789 if file.endswith(".dylib") or os.access(file, os.X_OK): |
| |
790 return self.RunFileCommand(file).startswith("Mach-O") |
| |
791 return False |
| |
792 |
| |
793 def ShouldSkipDir(self, dir): |
| |
794 """We create .dSYM bundles on the fly, but if someone runs |
| |
795 buildsymbols twice, we should skip any bundles we created |
| |
796 previously, otherwise we'll recurse into them and try to |
| |
797 dump the inner bits again.""" |
| |
798 if dir.endswith(".dSYM"): |
| |
799 return True |
| |
800 return False |
| |
801 |
| |
802 def ProcessFiles(self, files, after=None, after_arg=None): |
| |
803 # also note, files must be len 1 here, since we're the only ones |
| |
804 # that ever add more than one file to the list |
| |
805 self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0])) |
| |
806 self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished) |
| |
807 |
| |
808 def ProcessFilesMacFinished(self, result): |
| |
809 if result['status']: |
| |
810 # kick off new jobs per-arch with our new list of files |
| |
811 Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0]) |
| |
812 # only decrement jobs *after* that, since otherwise we'll remove the record for this file |
| |
813 self.JobFinished(result['files'][-1]) |
| |
814 |
| |
815 def ProcessFilesWorkMac(self, file): |
| |
816 """dump_syms on Mac needs to be run on a dSYM bundle produced |
| |
817 by dsymutil(1), so run dsymutil here and pass the bundle name |
| |
818 down to the superclass method instead.""" |
| |
819 self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,)) |
| |
820 |
| |
821 # our return is a status and a tuple of files to dump symbols for |
| |
822 # the extra files are fallbacks; as soon as one is dumped successfully, we stop |
| |
823 result = { 'status' : False, 'files' : None, 'file_key' : file } |
| |
824 dsymbundle = file + ".dSYM" |
| |
825 if os.path.exists(dsymbundle): |
| |
826 shutil.rmtree(dsymbundle) |
| |
827 # dsymutil takes --arch=foo instead of -a foo like everything else |
| |
828 subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a] |
| |
829 + [file], |
| |
830 stdout=open("/dev/null","w")) |
| |
831 if not os.path.exists(dsymbundle): |
| |
832 # dsymutil won't produce a .dSYM for files without symbols |
| |
833 self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,)) |
| |
834 result['status'] = False |
| |
835 result['files'] = (file, ) |
| |
836 return result |
| |
837 |
| |
838 result['status'] = True |
| |
839 result['files'] = (dsymbundle, file) |
| |
840 return result |
| |
841 |
| |
842 def CopyDebug(self, file, debug_file, guid): |
| |
843 """ProcessFiles has already produced a dSYM bundle, so we should just |
| |
844 copy that to the destination directory. However, we'll package it |
| |
845 into a .tar.bz2 because the debug symbols are pretty huge, and |
| |
846 also because it's a bundle, so it's a directory. |file| here is the |
| |
847 dSYM bundle, and |debug_file| is the original filename.""" |
| |
848 rel_path = os.path.join(debug_file, |
| |
849 guid, |
| |
850 os.path.basename(file) + ".tar.bz2") |
| |
851 full_path = os.path.abspath(os.path.join(self.symbol_path, |
| |
852 rel_path)) |
| |
853 success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)], |
| |
854 cwd=os.path.dirname(file), |
| |
855 stdout=open("/dev/null","w"), stderr=subprocess.STDOUT) |
| |
856 if success == 0 and os.path.exists(full_path): |
| |
857 self.output(sys.stdout, rel_path) |
| |
858 |
| |
859 # Entry point if called as a standalone program |
| |
860 def main(): |
| |
861 parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>") |
| |
862 parser.add_option("-c", "--copy", |
| |
863 action="store_true", dest="copy_debug", default=False, |
| |
864 help="Copy debug info files into the same directory structure as symbol files") |
| |
865 parser.add_option("-a", "--archs", |
| |
866 action="store", dest="archs", |
| |
867 help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)") |
| |
868 parser.add_option("-s", "--srcdir", |
| |
869 action="append", dest="srcdir", default=[], |
| |
870 help="Use SRCDIR to determine relative paths to source files") |
| |
871 parser.add_option("-v", "--vcs-info", |
| |
872 action="store_true", dest="vcsinfo", |
| |
873 help="Try to retrieve VCS info for each FILE listed in the output") |
| |
874 parser.add_option("-i", "--source-index", |
| |
875 action="store_true", dest="srcsrv", default=False, |
| |
876 help="Add source index information to debug files, making them suitable for use in a source server.") |
| |
877 parser.add_option("-x", "--exclude", |
| |
878 action="append", dest="exclude", default=[], metavar="PATTERN", |
| |
879 help="Skip processing files matching PATTERN.") |
| |
880 parser.add_option("--repo-manifest", |
| |
881 action="store", dest="repo_manifest", |
| |
882 help="""Get source information from this XML manifest |
| |
883 produced by the `repo manifest -r` command. |
| |
884 """) |
| |
885 (options, args) = parser.parse_args() |
| |
886 |
| |
887 #check to see if the pdbstr.exe exists |
| |
888 if options.srcsrv: |
| |
889 pdbstr = os.environ.get("PDBSTR_PATH") |
| |
890 if not os.path.exists(pdbstr): |
| |
891 print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n" |
| |
892 sys.exit(1) |
| |
893 |
| |
894 if len(args) < 3: |
| |
895 parser.error("not enough arguments") |
| |
896 exit(1) |
| |
897 |
| |
898 dumper = GetPlatformSpecificDumper(dump_syms=args[0], |
| |
899 symbol_path=args[1], |
| |
900 copy_debug=options.copy_debug, |
| |
901 archs=options.archs, |
| |
902 srcdirs=options.srcdir, |
| |
903 vcsinfo=options.vcsinfo, |
| |
904 srcsrv=options.srcsrv, |
| |
905 exclude=options.exclude, |
| |
906 repo_manifest=options.repo_manifest) |
| |
907 for arg in args[2:]: |
| |
908 dumper.Process(arg) |
| |
909 dumper.Finish() |
| |
910 |
| |
911 # run main if run directly |
| |
912 if __name__ == "__main__": |
| |
913 # set up the multiprocessing infrastructure before we start; |
| |
914 # note that this needs to be in the __main__ guard, or else Windows will choke |
| |
915 Dumper.GlobalInit() |
| |
916 |
| |
917 main() |