config/find_OOM_errors.py

branch
TOR_BUG_3246
changeset 7
129ffea94266
equal deleted inserted replaced
-1:000000000000 0:5419cabeb113
1 #!/usr/bin/env python
2 # This Source Code Form is subject to the terms of the Mozilla Public
3 # License, v. 2.0. If a copy of the MPL was not distributed with this
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 from __future__ import print_function
6
7 usage = """%prog: A test for OOM conditions in the shell.
8
9 %prog finds segfaults and other errors caused by incorrect handling of
10 allocation during OOM (out-of-memory) conditions.
11 """
12
13 help = """Check for regressions only. This runs a set of files with a known
14 number of OOM errors (specified by REGRESSION_COUNT), and exits with a non-zero
15 result if more or less errors are found. See js/src/Makefile.in for invocation.
16 """
17
18
19 import hashlib
20 import re
21 import shlex
22 import subprocess
23 import sys
24 import threading
25 import time
26
27 from optparse import OptionParser
28
29 #####################################################################
30 # Utility functions
31 #####################################################################
32 def run(args, stdin=None):
33 class ThreadWorker(threading.Thread):
34 def __init__(self, pipe):
35 super(ThreadWorker, self).__init__()
36 self.all = ""
37 self.pipe = pipe
38 self.setDaemon(True)
39
40 def run(self):
41 while True:
42 line = self.pipe.readline()
43 if line == '': break
44 else:
45 self.all += line
46
47 try:
48 if type(args) == str:
49 args = shlex.split(args)
50
51 args = [str(a) for a in args] # convert to strs
52
53 stdin_pipe = subprocess.PIPE if stdin else None
54 proc = subprocess.Popen(args, stdin=stdin_pipe, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
55 if stdin_pipe:
56 proc.stdin.write(stdin)
57 proc.stdin.close()
58
59 stdout_worker = ThreadWorker(proc.stdout)
60 stderr_worker = ThreadWorker(proc.stderr)
61 stdout_worker.start()
62 stderr_worker.start()
63
64 proc.wait()
65 stdout_worker.join()
66 stderr_worker.join()
67
68 except KeyboardInterrupt as e:
69 sys.exit(-1)
70
71 stdout, stderr = stdout_worker.all, stderr_worker.all
72 result = (stdout, stderr, proc.returncode)
73 return result
74
75 def get_js_files():
76 (out, err, exit) = run('find ../jit-test/tests -name "*.js"')
77 if (err, exit) != ("", 0):
78 sys.exit("Wrong directory, run from an objdir")
79 return out.split()
80
81
82
83 #####################################################################
84 # Blacklisting
85 #####################################################################
86 def in_blacklist(sig):
87 return sig in blacklist
88
89 def add_to_blacklist(sig):
90 blacklist[sig] = blacklist.get(sig, 0)
91 blacklist[sig] += 1
92
93 # How often is a particular lines important for this.
94 def count_lines():
95 """Keep track of the amount of times individual lines occur, in order to
96 prioritize the errors which occur most frequently."""
97 counts = {}
98 for string,count in blacklist.items():
99 for line in string.split("\n"):
100 counts[line] = counts.get(line, 0) + count
101
102 lines = []
103 for k,v in counts.items():
104 lines.append("{0:6}: {1}".format(v, k))
105
106 lines.sort()
107
108 countlog = file("../OOM_count_log", "w")
109 countlog.write("\n".join(lines))
110 countlog.flush()
111 countlog.close()
112
113
114 #####################################################################
115 # Output cleaning
116 #####################################################################
117 def clean_voutput(err):
118 # Skip what we can't reproduce
119 err = re.sub(r"^--\d+-- run: /usr/bin/dsymutil \"shell/js\"$", "", err, flags=re.MULTILINE)
120 err = re.sub(r"^==\d+==", "", err, flags=re.MULTILINE)
121 err = re.sub(r"^\*\*\d+\*\*", "", err, flags=re.MULTILINE)
122 err = re.sub(r"^\s+by 0x[0-9A-Fa-f]+: ", "by: ", err, flags=re.MULTILINE)
123 err = re.sub(r"^\s+at 0x[0-9A-Fa-f]+: ", "at: ", err, flags=re.MULTILINE)
124 err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is not stack'd)", r"\1\2", err, flags=re.MULTILINE)
125 err = re.sub(r"(^\s+Invalid write of size )\d+", r"\1x", err, flags=re.MULTILINE)
126 err = re.sub(r"(^\s+Invalid read of size )\d+", r"\1x", err, flags=re.MULTILINE)
127 err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is )\d+( bytes inside a block of size )[0-9,]+( free'd)", r"\1\2\3\4", err, flags=re.MULTILINE)
128
129 # Skip the repeating bit due to the segfault
130 lines = []
131 for l in err.split('\n'):
132 if l == " Process terminating with default action of signal 11 (SIGSEGV)":
133 break
134 lines.append(l)
135 err = '\n'.join(lines)
136
137 return err
138
139 def remove_failed_allocation_backtraces(err):
140 lines = []
141
142 add = True
143 for l in err.split('\n'):
144
145 # Set start and end conditions for including text
146 if l == " The site of the failed allocation is:":
147 add = False
148 elif l[:2] not in ['by: ', 'at:']:
149 add = True
150
151 if add:
152 lines.append(l)
153
154
155 err = '\n'.join(lines)
156
157 return err
158
159
160 def clean_output(err):
161 err = re.sub(r"^js\(\d+,0x[0-9a-f]+\) malloc: \*\*\* error for object 0x[0-9a-f]+: pointer being freed was not allocated\n\*\*\* set a breakppoint in malloc_error_break to debug\n$", "pointer being freed was not allocated", err, flags=re.MULTILINE)
162
163 return err
164
165
166 #####################################################################
167 # Consts, etc
168 #####################################################################
169
170 command_template = 'shell/js' \
171 + ' -m -j -p' \
172 + ' -e "const platform=\'darwin\'; const libdir=\'../jit-test/lib/\';"' \
173 + ' -f ../jit-test/lib/prolog.js' \
174 + ' -f {0}'
175
176
177 # Blacklists are things we don't want to see in our logs again (though we do
178 # want to count them when they happen). Whitelists we do want to see in our
179 # logs again, principally because the information we have isn't enough.
180
181 blacklist = {}
182 add_to_blacklist(r"('', '', 1)") # 1 means OOM if the shell hasn't launched yet.
183 add_to_blacklist(r"('', 'out of memory\n', 1)")
184
185 whitelist = set()
186 whitelist.add(r"('', 'out of memory\n', -11)") # -11 means OOM
187 whitelist.add(r"('', 'out of memory\nout of memory\n', -11)")
188
189
190
191 #####################################################################
192 # Program
193 #####################################################################
194
195 # Options
196 parser = OptionParser(usage=usage)
197 parser.add_option("-r", "--regression", action="store", metavar="REGRESSION_COUNT", help=help,
198 type="int", dest="regression", default=None)
199
200 (OPTIONS, args) = parser.parse_args()
201
202
203 if OPTIONS.regression != None:
204 # TODO: This should be expanded as we get a better hang of the OOM problems.
205 # For now, we'll just check that the number of OOMs in one short file does not
206 # increase.
207 files = ["../jit-test/tests/arguments/args-createontrace.js"]
208 else:
209 files = get_js_files()
210
211 # Use a command-line arg to reduce the set of files
212 if len (args):
213 files = [f for f in files if f.find(args[0]) != -1]
214
215
216 if OPTIONS.regression == None:
217 # Don't use a logfile, this is automated for tinderbox.
218 log = file("../OOM_log", "w")
219
220
221 num_failures = 0
222 for f in files:
223
224 # Run it once to establish boundaries
225 command = (command_template + ' -O').format(f)
226 out, err, exit = run(command)
227 max = re.match(".*OOM max count: (\d+).*", out, flags=re.DOTALL).groups()[0]
228 max = int(max)
229
230 # OOMs don't recover well for the first 20 allocations or so.
231 # TODO: revisit this.
232 for i in range(20, max):
233
234 if OPTIONS.regression == None:
235 print("Testing allocation {0}/{1} in {2}".format(i,max,f))
236 else:
237 sys.stdout.write('.') # something short for tinderbox, no space or \n
238
239 command = (command_template + ' -A {0}').format(f, i)
240 out, err, exit = run(command)
241
242 # Success (5 is SM's exit code for controlled errors)
243 if exit == 5 and err.find("out of memory") != -1:
244 continue
245
246 # Failure
247 else:
248
249 if OPTIONS.regression != None:
250 # Just count them
251 num_failures += 1
252 continue
253
254 #########################################################################
255 # The regression tests ends above. The rest of this is for running the
256 # script manually.
257 #########################################################################
258
259 problem = str((out, err, exit))
260 if in_blacklist(problem) and problem not in whitelist:
261 add_to_blacklist(problem)
262 continue
263
264 add_to_blacklist(problem)
265
266
267 # Get valgrind output for a good stack trace
268 vcommand = "valgrind --dsymutil=yes -q --log-file=OOM_valgrind_log_file " + command
269 run(vcommand)
270 vout = file("OOM_valgrind_log_file").read()
271 vout = clean_voutput(vout)
272 sans_alloc_sites = remove_failed_allocation_backtraces(vout)
273
274 # Don't print duplicate information
275 if in_blacklist(sans_alloc_sites):
276 add_to_blacklist(sans_alloc_sites)
277 continue
278
279 add_to_blacklist(sans_alloc_sites)
280
281 log.write ("\n")
282 log.write ("\n")
283 log.write ("=========================================================================")
284 log.write ("\n")
285 log.write ("An allocation failure at\n\tallocation {0}/{1} in {2}\n\t"
286 "causes problems (detected using bug 624094)"
287 .format(i, max, f))
288 log.write ("\n")
289 log.write ("\n")
290
291 log.write ("Command (from obj directory, using patch from bug 624094):\n " + command)
292 log.write ("\n")
293 log.write ("\n")
294 log.write ("stdout, stderr, exitcode:\n " + problem)
295 log.write ("\n")
296 log.write ("\n")
297
298 double_free = err.find("pointer being freed was not allocated") != -1
299 oom_detected = err.find("out of memory") != -1
300 multiple_oom_detected = err.find("out of memory\nout of memory") != -1
301 segfault_detected = exit == -11
302
303 log.write ("Diagnosis: ")
304 log.write ("\n")
305 if multiple_oom_detected:
306 log.write (" - Multiple OOMs reported")
307 log.write ("\n")
308 if segfault_detected:
309 log.write (" - segfault")
310 log.write ("\n")
311 if not oom_detected:
312 log.write (" - No OOM checking")
313 log.write ("\n")
314 if double_free:
315 log.write (" - Double free")
316 log.write ("\n")
317
318 log.write ("\n")
319
320 log.write ("Valgrind info:\n" + vout)
321 log.write ("\n")
322 log.write ("\n")
323 log.flush()
324
325 if OPTIONS.regression == None:
326 count_lines()
327
328 print()
329
330 # Do the actual regression check
331 if OPTIONS.regression != None:
332 expected_num_failures = OPTIONS.regression
333
334 if num_failures != expected_num_failures:
335
336 print("TEST-UNEXPECTED-FAIL |", end='')
337 if num_failures > expected_num_failures:
338 print("More out-of-memory errors were found ({0}) than expected ({1}). "
339 "This probably means an allocation site has been added without a "
340 "NULL-check. If this is unavoidable, you can account for it by "
341 "updating Makefile.in.".format(num_failures, expected_num_failures),
342 end='')
343 else:
344 print("Congratulations, you have removed {0} out-of-memory error(s) "
345 "({1} remain)! Please account for it by updating Makefile.in."
346 .format(expected_num_failures - num_failures, num_failures),
347 end='')
348 sys.exit(-1)
349 else:
350 print('TEST-PASS | find_OOM_errors | Found the expected number of OOM '
351 'errors ({0})'.format(expected_num_failures))
352

mercurial