config/find_OOM_errors.py

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 #!/usr/bin/env python
michael@0 2 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 5 from __future__ import print_function
michael@0 6
michael@0 7 usage = """%prog: A test for OOM conditions in the shell.
michael@0 8
michael@0 9 %prog finds segfaults and other errors caused by incorrect handling of
michael@0 10 allocation during OOM (out-of-memory) conditions.
michael@0 11 """
michael@0 12
michael@0 13 help = """Check for regressions only. This runs a set of files with a known
michael@0 14 number of OOM errors (specified by REGRESSION_COUNT), and exits with a non-zero
michael@0 15 result if more or less errors are found. See js/src/Makefile.in for invocation.
michael@0 16 """
michael@0 17
michael@0 18
michael@0 19 import hashlib
michael@0 20 import re
michael@0 21 import shlex
michael@0 22 import subprocess
michael@0 23 import sys
michael@0 24 import threading
michael@0 25 import time
michael@0 26
michael@0 27 from optparse import OptionParser
michael@0 28
michael@0 29 #####################################################################
michael@0 30 # Utility functions
michael@0 31 #####################################################################
michael@0 32 def run(args, stdin=None):
michael@0 33 class ThreadWorker(threading.Thread):
michael@0 34 def __init__(self, pipe):
michael@0 35 super(ThreadWorker, self).__init__()
michael@0 36 self.all = ""
michael@0 37 self.pipe = pipe
michael@0 38 self.setDaemon(True)
michael@0 39
michael@0 40 def run(self):
michael@0 41 while True:
michael@0 42 line = self.pipe.readline()
michael@0 43 if line == '': break
michael@0 44 else:
michael@0 45 self.all += line
michael@0 46
michael@0 47 try:
michael@0 48 if type(args) == str:
michael@0 49 args = shlex.split(args)
michael@0 50
michael@0 51 args = [str(a) for a in args] # convert to strs
michael@0 52
michael@0 53 stdin_pipe = subprocess.PIPE if stdin else None
michael@0 54 proc = subprocess.Popen(args, stdin=stdin_pipe, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
michael@0 55 if stdin_pipe:
michael@0 56 proc.stdin.write(stdin)
michael@0 57 proc.stdin.close()
michael@0 58
michael@0 59 stdout_worker = ThreadWorker(proc.stdout)
michael@0 60 stderr_worker = ThreadWorker(proc.stderr)
michael@0 61 stdout_worker.start()
michael@0 62 stderr_worker.start()
michael@0 63
michael@0 64 proc.wait()
michael@0 65 stdout_worker.join()
michael@0 66 stderr_worker.join()
michael@0 67
michael@0 68 except KeyboardInterrupt as e:
michael@0 69 sys.exit(-1)
michael@0 70
michael@0 71 stdout, stderr = stdout_worker.all, stderr_worker.all
michael@0 72 result = (stdout, stderr, proc.returncode)
michael@0 73 return result
michael@0 74
michael@0 75 def get_js_files():
michael@0 76 (out, err, exit) = run('find ../jit-test/tests -name "*.js"')
michael@0 77 if (err, exit) != ("", 0):
michael@0 78 sys.exit("Wrong directory, run from an objdir")
michael@0 79 return out.split()
michael@0 80
michael@0 81
michael@0 82
michael@0 83 #####################################################################
michael@0 84 # Blacklisting
michael@0 85 #####################################################################
michael@0 86 def in_blacklist(sig):
michael@0 87 return sig in blacklist
michael@0 88
michael@0 89 def add_to_blacklist(sig):
michael@0 90 blacklist[sig] = blacklist.get(sig, 0)
michael@0 91 blacklist[sig] += 1
michael@0 92
michael@0 93 # How often is a particular lines important for this.
michael@0 94 def count_lines():
michael@0 95 """Keep track of the amount of times individual lines occur, in order to
michael@0 96 prioritize the errors which occur most frequently."""
michael@0 97 counts = {}
michael@0 98 for string,count in blacklist.items():
michael@0 99 for line in string.split("\n"):
michael@0 100 counts[line] = counts.get(line, 0) + count
michael@0 101
michael@0 102 lines = []
michael@0 103 for k,v in counts.items():
michael@0 104 lines.append("{0:6}: {1}".format(v, k))
michael@0 105
michael@0 106 lines.sort()
michael@0 107
michael@0 108 countlog = file("../OOM_count_log", "w")
michael@0 109 countlog.write("\n".join(lines))
michael@0 110 countlog.flush()
michael@0 111 countlog.close()
michael@0 112
michael@0 113
michael@0 114 #####################################################################
michael@0 115 # Output cleaning
michael@0 116 #####################################################################
michael@0 117 def clean_voutput(err):
michael@0 118 # Skip what we can't reproduce
michael@0 119 err = re.sub(r"^--\d+-- run: /usr/bin/dsymutil \"shell/js\"$", "", err, flags=re.MULTILINE)
michael@0 120 err = re.sub(r"^==\d+==", "", err, flags=re.MULTILINE)
michael@0 121 err = re.sub(r"^\*\*\d+\*\*", "", err, flags=re.MULTILINE)
michael@0 122 err = re.sub(r"^\s+by 0x[0-9A-Fa-f]+: ", "by: ", err, flags=re.MULTILINE)
michael@0 123 err = re.sub(r"^\s+at 0x[0-9A-Fa-f]+: ", "at: ", err, flags=re.MULTILINE)
michael@0 124 err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is not stack'd)", r"\1\2", err, flags=re.MULTILINE)
michael@0 125 err = re.sub(r"(^\s+Invalid write of size )\d+", r"\1x", err, flags=re.MULTILINE)
michael@0 126 err = re.sub(r"(^\s+Invalid read of size )\d+", r"\1x", err, flags=re.MULTILINE)
michael@0 127 err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is )\d+( bytes inside a block of size )[0-9,]+( free'd)", r"\1\2\3\4", err, flags=re.MULTILINE)
michael@0 128
michael@0 129 # Skip the repeating bit due to the segfault
michael@0 130 lines = []
michael@0 131 for l in err.split('\n'):
michael@0 132 if l == " Process terminating with default action of signal 11 (SIGSEGV)":
michael@0 133 break
michael@0 134 lines.append(l)
michael@0 135 err = '\n'.join(lines)
michael@0 136
michael@0 137 return err
michael@0 138
michael@0 139 def remove_failed_allocation_backtraces(err):
michael@0 140 lines = []
michael@0 141
michael@0 142 add = True
michael@0 143 for l in err.split('\n'):
michael@0 144
michael@0 145 # Set start and end conditions for including text
michael@0 146 if l == " The site of the failed allocation is:":
michael@0 147 add = False
michael@0 148 elif l[:2] not in ['by: ', 'at:']:
michael@0 149 add = True
michael@0 150
michael@0 151 if add:
michael@0 152 lines.append(l)
michael@0 153
michael@0 154
michael@0 155 err = '\n'.join(lines)
michael@0 156
michael@0 157 return err
michael@0 158
michael@0 159
michael@0 160 def clean_output(err):
michael@0 161 err = re.sub(r"^js\(\d+,0x[0-9a-f]+\) malloc: \*\*\* error for object 0x[0-9a-f]+: pointer being freed was not allocated\n\*\*\* set a breakppoint in malloc_error_break to debug\n$", "pointer being freed was not allocated", err, flags=re.MULTILINE)
michael@0 162
michael@0 163 return err
michael@0 164
michael@0 165
michael@0 166 #####################################################################
michael@0 167 # Consts, etc
michael@0 168 #####################################################################
michael@0 169
michael@0 170 command_template = 'shell/js' \
michael@0 171 + ' -m -j -p' \
michael@0 172 + ' -e "const platform=\'darwin\'; const libdir=\'../jit-test/lib/\';"' \
michael@0 173 + ' -f ../jit-test/lib/prolog.js' \
michael@0 174 + ' -f {0}'
michael@0 175
michael@0 176
michael@0 177 # Blacklists are things we don't want to see in our logs again (though we do
michael@0 178 # want to count them when they happen). Whitelists we do want to see in our
michael@0 179 # logs again, principally because the information we have isn't enough.
michael@0 180
michael@0 181 blacklist = {}
michael@0 182 add_to_blacklist(r"('', '', 1)") # 1 means OOM if the shell hasn't launched yet.
michael@0 183 add_to_blacklist(r"('', 'out of memory\n', 1)")
michael@0 184
michael@0 185 whitelist = set()
michael@0 186 whitelist.add(r"('', 'out of memory\n', -11)") # -11 means OOM
michael@0 187 whitelist.add(r"('', 'out of memory\nout of memory\n', -11)")
michael@0 188
michael@0 189
michael@0 190
michael@0 191 #####################################################################
michael@0 192 # Program
michael@0 193 #####################################################################
michael@0 194
michael@0 195 # Options
michael@0 196 parser = OptionParser(usage=usage)
michael@0 197 parser.add_option("-r", "--regression", action="store", metavar="REGRESSION_COUNT", help=help,
michael@0 198 type="int", dest="regression", default=None)
michael@0 199
michael@0 200 (OPTIONS, args) = parser.parse_args()
michael@0 201
michael@0 202
michael@0 203 if OPTIONS.regression != None:
michael@0 204 # TODO: This should be expanded as we get a better hang of the OOM problems.
michael@0 205 # For now, we'll just check that the number of OOMs in one short file does not
michael@0 206 # increase.
michael@0 207 files = ["../jit-test/tests/arguments/args-createontrace.js"]
michael@0 208 else:
michael@0 209 files = get_js_files()
michael@0 210
michael@0 211 # Use a command-line arg to reduce the set of files
michael@0 212 if len (args):
michael@0 213 files = [f for f in files if f.find(args[0]) != -1]
michael@0 214
michael@0 215
michael@0 216 if OPTIONS.regression == None:
michael@0 217 # Don't use a logfile, this is automated for tinderbox.
michael@0 218 log = file("../OOM_log", "w")
michael@0 219
michael@0 220
michael@0 221 num_failures = 0
michael@0 222 for f in files:
michael@0 223
michael@0 224 # Run it once to establish boundaries
michael@0 225 command = (command_template + ' -O').format(f)
michael@0 226 out, err, exit = run(command)
michael@0 227 max = re.match(".*OOM max count: (\d+).*", out, flags=re.DOTALL).groups()[0]
michael@0 228 max = int(max)
michael@0 229
michael@0 230 # OOMs don't recover well for the first 20 allocations or so.
michael@0 231 # TODO: revisit this.
michael@0 232 for i in range(20, max):
michael@0 233
michael@0 234 if OPTIONS.regression == None:
michael@0 235 print("Testing allocation {0}/{1} in {2}".format(i,max,f))
michael@0 236 else:
michael@0 237 sys.stdout.write('.') # something short for tinderbox, no space or \n
michael@0 238
michael@0 239 command = (command_template + ' -A {0}').format(f, i)
michael@0 240 out, err, exit = run(command)
michael@0 241
michael@0 242 # Success (5 is SM's exit code for controlled errors)
michael@0 243 if exit == 5 and err.find("out of memory") != -1:
michael@0 244 continue
michael@0 245
michael@0 246 # Failure
michael@0 247 else:
michael@0 248
michael@0 249 if OPTIONS.regression != None:
michael@0 250 # Just count them
michael@0 251 num_failures += 1
michael@0 252 continue
michael@0 253
michael@0 254 #########################################################################
michael@0 255 # The regression tests ends above. The rest of this is for running the
michael@0 256 # script manually.
michael@0 257 #########################################################################
michael@0 258
michael@0 259 problem = str((out, err, exit))
michael@0 260 if in_blacklist(problem) and problem not in whitelist:
michael@0 261 add_to_blacklist(problem)
michael@0 262 continue
michael@0 263
michael@0 264 add_to_blacklist(problem)
michael@0 265
michael@0 266
michael@0 267 # Get valgrind output for a good stack trace
michael@0 268 vcommand = "valgrind --dsymutil=yes -q --log-file=OOM_valgrind_log_file " + command
michael@0 269 run(vcommand)
michael@0 270 vout = file("OOM_valgrind_log_file").read()
michael@0 271 vout = clean_voutput(vout)
michael@0 272 sans_alloc_sites = remove_failed_allocation_backtraces(vout)
michael@0 273
michael@0 274 # Don't print duplicate information
michael@0 275 if in_blacklist(sans_alloc_sites):
michael@0 276 add_to_blacklist(sans_alloc_sites)
michael@0 277 continue
michael@0 278
michael@0 279 add_to_blacklist(sans_alloc_sites)
michael@0 280
michael@0 281 log.write ("\n")
michael@0 282 log.write ("\n")
michael@0 283 log.write ("=========================================================================")
michael@0 284 log.write ("\n")
michael@0 285 log.write ("An allocation failure at\n\tallocation {0}/{1} in {2}\n\t"
michael@0 286 "causes problems (detected using bug 624094)"
michael@0 287 .format(i, max, f))
michael@0 288 log.write ("\n")
michael@0 289 log.write ("\n")
michael@0 290
michael@0 291 log.write ("Command (from obj directory, using patch from bug 624094):\n " + command)
michael@0 292 log.write ("\n")
michael@0 293 log.write ("\n")
michael@0 294 log.write ("stdout, stderr, exitcode:\n " + problem)
michael@0 295 log.write ("\n")
michael@0 296 log.write ("\n")
michael@0 297
michael@0 298 double_free = err.find("pointer being freed was not allocated") != -1
michael@0 299 oom_detected = err.find("out of memory") != -1
michael@0 300 multiple_oom_detected = err.find("out of memory\nout of memory") != -1
michael@0 301 segfault_detected = exit == -11
michael@0 302
michael@0 303 log.write ("Diagnosis: ")
michael@0 304 log.write ("\n")
michael@0 305 if multiple_oom_detected:
michael@0 306 log.write (" - Multiple OOMs reported")
michael@0 307 log.write ("\n")
michael@0 308 if segfault_detected:
michael@0 309 log.write (" - segfault")
michael@0 310 log.write ("\n")
michael@0 311 if not oom_detected:
michael@0 312 log.write (" - No OOM checking")
michael@0 313 log.write ("\n")
michael@0 314 if double_free:
michael@0 315 log.write (" - Double free")
michael@0 316 log.write ("\n")
michael@0 317
michael@0 318 log.write ("\n")
michael@0 319
michael@0 320 log.write ("Valgrind info:\n" + vout)
michael@0 321 log.write ("\n")
michael@0 322 log.write ("\n")
michael@0 323 log.flush()
michael@0 324
michael@0 325 if OPTIONS.regression == None:
michael@0 326 count_lines()
michael@0 327
michael@0 328 print()
michael@0 329
michael@0 330 # Do the actual regression check
michael@0 331 if OPTIONS.regression != None:
michael@0 332 expected_num_failures = OPTIONS.regression
michael@0 333
michael@0 334 if num_failures != expected_num_failures:
michael@0 335
michael@0 336 print("TEST-UNEXPECTED-FAIL |", end='')
michael@0 337 if num_failures > expected_num_failures:
michael@0 338 print("More out-of-memory errors were found ({0}) than expected ({1}). "
michael@0 339 "This probably means an allocation site has been added without a "
michael@0 340 "NULL-check. If this is unavoidable, you can account for it by "
michael@0 341 "updating Makefile.in.".format(num_failures, expected_num_failures),
michael@0 342 end='')
michael@0 343 else:
michael@0 344 print("Congratulations, you have removed {0} out-of-memory error(s) "
michael@0 345 "({1} remain)! Please account for it by updating Makefile.in."
michael@0 346 .format(expected_num_failures - num_failures, num_failures),
michael@0 347 end='')
michael@0 348 sys.exit(-1)
michael@0 349 else:
michael@0 350 print('TEST-PASS | find_OOM_errors | Found the expected number of OOM '
michael@0 351 'errors ({0})'.format(expected_num_failures))
michael@0 352

mercurial