1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/config/find_OOM_errors.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,352 @@ 1.4 +#!/usr/bin/env python 1.5 +# This Source Code Form is subject to the terms of the Mozilla Public 1.6 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.8 +from __future__ import print_function 1.9 + 1.10 +usage = """%prog: A test for OOM conditions in the shell. 1.11 + 1.12 +%prog finds segfaults and other errors caused by incorrect handling of 1.13 +allocation during OOM (out-of-memory) conditions. 1.14 +""" 1.15 + 1.16 +help = """Check for regressions only. This runs a set of files with a known 1.17 +number of OOM errors (specified by REGRESSION_COUNT), and exits with a non-zero 1.18 +result if more or less errors are found. See js/src/Makefile.in for invocation. 1.19 +""" 1.20 + 1.21 + 1.22 +import hashlib 1.23 +import re 1.24 +import shlex 1.25 +import subprocess 1.26 +import sys 1.27 +import threading 1.28 +import time 1.29 + 1.30 +from optparse import OptionParser 1.31 + 1.32 +##################################################################### 1.33 +# Utility functions 1.34 +##################################################################### 1.35 +def run(args, stdin=None): 1.36 + class ThreadWorker(threading.Thread): 1.37 + def __init__(self, pipe): 1.38 + super(ThreadWorker, self).__init__() 1.39 + self.all = "" 1.40 + self.pipe = pipe 1.41 + self.setDaemon(True) 1.42 + 1.43 + def run(self): 1.44 + while True: 1.45 + line = self.pipe.readline() 1.46 + if line == '': break 1.47 + else: 1.48 + self.all += line 1.49 + 1.50 + try: 1.51 + if type(args) == str: 1.52 + args = shlex.split(args) 1.53 + 1.54 + args = [str(a) for a in args] # convert to strs 1.55 + 1.56 + stdin_pipe = subprocess.PIPE if stdin else None 1.57 + proc = subprocess.Popen(args, stdin=stdin_pipe, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 1.58 + if stdin_pipe: 1.59 + proc.stdin.write(stdin) 1.60 + proc.stdin.close() 1.61 + 1.62 + stdout_worker = ThreadWorker(proc.stdout) 1.63 + stderr_worker = ThreadWorker(proc.stderr) 1.64 + stdout_worker.start() 1.65 + stderr_worker.start() 1.66 + 1.67 + proc.wait() 1.68 + stdout_worker.join() 1.69 + stderr_worker.join() 1.70 + 1.71 + except KeyboardInterrupt as e: 1.72 + sys.exit(-1) 1.73 + 1.74 + stdout, stderr = stdout_worker.all, stderr_worker.all 1.75 + result = (stdout, stderr, proc.returncode) 1.76 + return result 1.77 + 1.78 +def get_js_files(): 1.79 + (out, err, exit) = run('find ../jit-test/tests -name "*.js"') 1.80 + if (err, exit) != ("", 0): 1.81 + sys.exit("Wrong directory, run from an objdir") 1.82 + return out.split() 1.83 + 1.84 + 1.85 + 1.86 +##################################################################### 1.87 +# Blacklisting 1.88 +##################################################################### 1.89 +def in_blacklist(sig): 1.90 + return sig in blacklist 1.91 + 1.92 +def add_to_blacklist(sig): 1.93 + blacklist[sig] = blacklist.get(sig, 0) 1.94 + blacklist[sig] += 1 1.95 + 1.96 +# How often is a particular lines important for this. 1.97 +def count_lines(): 1.98 + """Keep track of the amount of times individual lines occur, in order to 1.99 + prioritize the errors which occur most frequently.""" 1.100 + counts = {} 1.101 + for string,count in blacklist.items(): 1.102 + for line in string.split("\n"): 1.103 + counts[line] = counts.get(line, 0) + count 1.104 + 1.105 + lines = [] 1.106 + for k,v in counts.items(): 1.107 + lines.append("{0:6}: {1}".format(v, k)) 1.108 + 1.109 + lines.sort() 1.110 + 1.111 + countlog = file("../OOM_count_log", "w") 1.112 + countlog.write("\n".join(lines)) 1.113 + countlog.flush() 1.114 + countlog.close() 1.115 + 1.116 + 1.117 +##################################################################### 1.118 +# Output cleaning 1.119 +##################################################################### 1.120 +def clean_voutput(err): 1.121 + # Skip what we can't reproduce 1.122 + err = re.sub(r"^--\d+-- run: /usr/bin/dsymutil \"shell/js\"$", "", err, flags=re.MULTILINE) 1.123 + err = re.sub(r"^==\d+==", "", err, flags=re.MULTILINE) 1.124 + err = re.sub(r"^\*\*\d+\*\*", "", err, flags=re.MULTILINE) 1.125 + err = re.sub(r"^\s+by 0x[0-9A-Fa-f]+: ", "by: ", err, flags=re.MULTILINE) 1.126 + err = re.sub(r"^\s+at 0x[0-9A-Fa-f]+: ", "at: ", err, flags=re.MULTILINE) 1.127 + err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is not stack'd)", r"\1\2", err, flags=re.MULTILINE) 1.128 + err = re.sub(r"(^\s+Invalid write of size )\d+", r"\1x", err, flags=re.MULTILINE) 1.129 + err = re.sub(r"(^\s+Invalid read of size )\d+", r"\1x", err, flags=re.MULTILINE) 1.130 + err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is )\d+( bytes inside a block of size )[0-9,]+( free'd)", r"\1\2\3\4", err, flags=re.MULTILINE) 1.131 + 1.132 + # Skip the repeating bit due to the segfault 1.133 + lines = [] 1.134 + for l in err.split('\n'): 1.135 + if l == " Process terminating with default action of signal 11 (SIGSEGV)": 1.136 + break 1.137 + lines.append(l) 1.138 + err = '\n'.join(lines) 1.139 + 1.140 + return err 1.141 + 1.142 +def remove_failed_allocation_backtraces(err): 1.143 + lines = [] 1.144 + 1.145 + add = True 1.146 + for l in err.split('\n'): 1.147 + 1.148 + # Set start and end conditions for including text 1.149 + if l == " The site of the failed allocation is:": 1.150 + add = False 1.151 + elif l[:2] not in ['by: ', 'at:']: 1.152 + add = True 1.153 + 1.154 + if add: 1.155 + lines.append(l) 1.156 + 1.157 + 1.158 + err = '\n'.join(lines) 1.159 + 1.160 + return err 1.161 + 1.162 + 1.163 +def clean_output(err): 1.164 + err = re.sub(r"^js\(\d+,0x[0-9a-f]+\) malloc: \*\*\* error for object 0x[0-9a-f]+: pointer being freed was not allocated\n\*\*\* set a breakppoint in malloc_error_break to debug\n$", "pointer being freed was not allocated", err, flags=re.MULTILINE) 1.165 + 1.166 + return err 1.167 + 1.168 + 1.169 +##################################################################### 1.170 +# Consts, etc 1.171 +##################################################################### 1.172 + 1.173 +command_template = 'shell/js' \ 1.174 + + ' -m -j -p' \ 1.175 + + ' -e "const platform=\'darwin\'; const libdir=\'../jit-test/lib/\';"' \ 1.176 + + ' -f ../jit-test/lib/prolog.js' \ 1.177 + + ' -f {0}' 1.178 + 1.179 + 1.180 +# Blacklists are things we don't want to see in our logs again (though we do 1.181 +# want to count them when they happen). Whitelists we do want to see in our 1.182 +# logs again, principally because the information we have isn't enough. 1.183 + 1.184 +blacklist = {} 1.185 +add_to_blacklist(r"('', '', 1)") # 1 means OOM if the shell hasn't launched yet. 1.186 +add_to_blacklist(r"('', 'out of memory\n', 1)") 1.187 + 1.188 +whitelist = set() 1.189 +whitelist.add(r"('', 'out of memory\n', -11)") # -11 means OOM 1.190 +whitelist.add(r"('', 'out of memory\nout of memory\n', -11)") 1.191 + 1.192 + 1.193 + 1.194 +##################################################################### 1.195 +# Program 1.196 +##################################################################### 1.197 + 1.198 +# Options 1.199 +parser = OptionParser(usage=usage) 1.200 +parser.add_option("-r", "--regression", action="store", metavar="REGRESSION_COUNT", help=help, 1.201 + type="int", dest="regression", default=None) 1.202 + 1.203 +(OPTIONS, args) = parser.parse_args() 1.204 + 1.205 + 1.206 +if OPTIONS.regression != None: 1.207 + # TODO: This should be expanded as we get a better hang of the OOM problems. 1.208 + # For now, we'll just check that the number of OOMs in one short file does not 1.209 + # increase. 1.210 + files = ["../jit-test/tests/arguments/args-createontrace.js"] 1.211 +else: 1.212 + files = get_js_files() 1.213 + 1.214 + # Use a command-line arg to reduce the set of files 1.215 + if len (args): 1.216 + files = [f for f in files if f.find(args[0]) != -1] 1.217 + 1.218 + 1.219 +if OPTIONS.regression == None: 1.220 + # Don't use a logfile, this is automated for tinderbox. 1.221 + log = file("../OOM_log", "w") 1.222 + 1.223 + 1.224 +num_failures = 0 1.225 +for f in files: 1.226 + 1.227 + # Run it once to establish boundaries 1.228 + command = (command_template + ' -O').format(f) 1.229 + out, err, exit = run(command) 1.230 + max = re.match(".*OOM max count: (\d+).*", out, flags=re.DOTALL).groups()[0] 1.231 + max = int(max) 1.232 + 1.233 + # OOMs don't recover well for the first 20 allocations or so. 1.234 + # TODO: revisit this. 1.235 + for i in range(20, max): 1.236 + 1.237 + if OPTIONS.regression == None: 1.238 + print("Testing allocation {0}/{1} in {2}".format(i,max,f)) 1.239 + else: 1.240 + sys.stdout.write('.') # something short for tinderbox, no space or \n 1.241 + 1.242 + command = (command_template + ' -A {0}').format(f, i) 1.243 + out, err, exit = run(command) 1.244 + 1.245 + # Success (5 is SM's exit code for controlled errors) 1.246 + if exit == 5 and err.find("out of memory") != -1: 1.247 + continue 1.248 + 1.249 + # Failure 1.250 + else: 1.251 + 1.252 + if OPTIONS.regression != None: 1.253 + # Just count them 1.254 + num_failures += 1 1.255 + continue 1.256 + 1.257 + ######################################################################### 1.258 + # The regression tests ends above. The rest of this is for running the 1.259 + # script manually. 1.260 + ######################################################################### 1.261 + 1.262 + problem = str((out, err, exit)) 1.263 + if in_blacklist(problem) and problem not in whitelist: 1.264 + add_to_blacklist(problem) 1.265 + continue 1.266 + 1.267 + add_to_blacklist(problem) 1.268 + 1.269 + 1.270 + # Get valgrind output for a good stack trace 1.271 + vcommand = "valgrind --dsymutil=yes -q --log-file=OOM_valgrind_log_file " + command 1.272 + run(vcommand) 1.273 + vout = file("OOM_valgrind_log_file").read() 1.274 + vout = clean_voutput(vout) 1.275 + sans_alloc_sites = remove_failed_allocation_backtraces(vout) 1.276 + 1.277 + # Don't print duplicate information 1.278 + if in_blacklist(sans_alloc_sites): 1.279 + add_to_blacklist(sans_alloc_sites) 1.280 + continue 1.281 + 1.282 + add_to_blacklist(sans_alloc_sites) 1.283 + 1.284 + log.write ("\n") 1.285 + log.write ("\n") 1.286 + log.write ("=========================================================================") 1.287 + log.write ("\n") 1.288 + log.write ("An allocation failure at\n\tallocation {0}/{1} in {2}\n\t" 1.289 + "causes problems (detected using bug 624094)" 1.290 + .format(i, max, f)) 1.291 + log.write ("\n") 1.292 + log.write ("\n") 1.293 + 1.294 + log.write ("Command (from obj directory, using patch from bug 624094):\n " + command) 1.295 + log.write ("\n") 1.296 + log.write ("\n") 1.297 + log.write ("stdout, stderr, exitcode:\n " + problem) 1.298 + log.write ("\n") 1.299 + log.write ("\n") 1.300 + 1.301 + double_free = err.find("pointer being freed was not allocated") != -1 1.302 + oom_detected = err.find("out of memory") != -1 1.303 + multiple_oom_detected = err.find("out of memory\nout of memory") != -1 1.304 + segfault_detected = exit == -11 1.305 + 1.306 + log.write ("Diagnosis: ") 1.307 + log.write ("\n") 1.308 + if multiple_oom_detected: 1.309 + log.write (" - Multiple OOMs reported") 1.310 + log.write ("\n") 1.311 + if segfault_detected: 1.312 + log.write (" - segfault") 1.313 + log.write ("\n") 1.314 + if not oom_detected: 1.315 + log.write (" - No OOM checking") 1.316 + log.write ("\n") 1.317 + if double_free: 1.318 + log.write (" - Double free") 1.319 + log.write ("\n") 1.320 + 1.321 + log.write ("\n") 1.322 + 1.323 + log.write ("Valgrind info:\n" + vout) 1.324 + log.write ("\n") 1.325 + log.write ("\n") 1.326 + log.flush() 1.327 + 1.328 + if OPTIONS.regression == None: 1.329 + count_lines() 1.330 + 1.331 +print() 1.332 + 1.333 +# Do the actual regression check 1.334 +if OPTIONS.regression != None: 1.335 + expected_num_failures = OPTIONS.regression 1.336 + 1.337 + if num_failures != expected_num_failures: 1.338 + 1.339 + print("TEST-UNEXPECTED-FAIL |", end='') 1.340 + if num_failures > expected_num_failures: 1.341 + print("More out-of-memory errors were found ({0}) than expected ({1}). " 1.342 + "This probably means an allocation site has been added without a " 1.343 + "NULL-check. If this is unavoidable, you can account for it by " 1.344 + "updating Makefile.in.".format(num_failures, expected_num_failures), 1.345 + end='') 1.346 + else: 1.347 + print("Congratulations, you have removed {0} out-of-memory error(s) " 1.348 + "({1} remain)! Please account for it by updating Makefile.in." 1.349 + .format(expected_num_failures - num_failures, num_failures), 1.350 + end='') 1.351 + sys.exit(-1) 1.352 + else: 1.353 + print('TEST-PASS | find_OOM_errors | Found the expected number of OOM ' 1.354 + 'errors ({0})'.format(expected_num_failures)) 1.355 +