tools/performance/diff-talos.py

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rwxr-xr-x

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 #!/usr/bin/env python
michael@0 2
michael@0 3 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 6
michael@0 7 """
michael@0 8 This is a simple script that does one thing only: compare talos runs from
michael@0 9 two revisions. It is intended to check which of two try runs is best or if
michael@0 10 a try improves over the m-c or m-i revision in branches from.
michael@0 11
michael@0 12 A big design goal is to avoid bit rot and to assert when bit rot is detected.
michael@0 13 The set of tests we run is a moving target. When possible this script
michael@0 14 should work with any test set, but in parts where it has to hard code
michael@0 15 information, it should try to assert that it is valid so that changes
michael@0 16 are detected and it is fixed earlier.
michael@0 17 """
michael@0 18
michael@0 19 import json
michael@0 20 import urllib2
michael@0 21 import math
michael@0 22 import sys
michael@0 23 from optparse import OptionParser
michael@0 24
michael@0 25 # FIXME: currently we assert that we know all the benchmarks just so
michael@0 26 # we are sure to maintain the bigger_is_better set updated. Is there a better
michael@0 27 # way to find/compute it?
michael@0 28 bigger_is_better = frozenset(('v8_7', 'dromaeo_dom', 'dromaeo_css'))
michael@0 29
michael@0 30 smaller_is_better = frozenset(('tdhtmlr_paint', 'tp5n_main_rss_paint',
michael@0 31 'ts_paint', 'tp5n_paint', 'tsvgr_opacity',
michael@0 32 'a11yr_paint', 'kraken',
michael@0 33 'tdhtmlr_nochrome_paint',
michael@0 34 'tspaint_places_generated_med', 'tpaint',
michael@0 35 'tp5n_shutdown_paint', 'tsvgr',
michael@0 36 'tp5n_pbytes_paint', 'tscrollr_paint',
michael@0 37 'tspaint_places_generated_max',
michael@0 38 'tp5n_responsiveness_paint',
michael@0 39 'sunspider', 'tp5n_xres_paint', 'num_ctors',
michael@0 40 'tresize', 'trobopan', 'tcheckerboard',
michael@0 41 'tcheck3', 'tcheck2', 'tprovider',
michael@0 42 'tp5n_modlistbytes_paint',
michael@0 43 'trace_malloc_maxheap', 'tp4m_nochrome',
michael@0 44 'trace_malloc_leaks', 'tp4m_main_rss_nochrome',
michael@0 45 'tp4m_shutdown_nochrome', 'tdhtml_nochrome',
michael@0 46 'ts_shutdown', 'tp5n_%cpu_paint',
michael@0 47 'trace_malloc_allocs', 'ts',
michael@0 48 'tsvg_nochrome', 'tp5n_content_rss_paint',
michael@0 49 'tp5n_main_startup_fileio_paint',
michael@0 50 'tp5n_nonmain_normal_netio_paint',
michael@0 51 'tp5n_nonmain_startup_fileio_paint',
michael@0 52 'tp5n_main_normal_fileio_paint',
michael@0 53 'tp5n_nonmain_normal_fileio_paint',
michael@0 54 'tp5n_main_startup_netio_paint',
michael@0 55 'tp5n_main_normal_netio_paint',
michael@0 56 'tp5n_main_shutdown_netio_paint',
michael@0 57 'tp5n_main_shutdown_fileio_paint'))
michael@0 58
michael@0 59 all_benchmarks = smaller_is_better | bigger_is_better
michael@0 60 assert len(smaller_is_better & bigger_is_better) == 0
michael@0 61
michael@0 62 def get_raw_data_for_revisions(revisions):
michael@0 63 """Loads data for the revisions, returns an array with one element for each
michael@0 64 revision."""
michael@0 65 selectors = ["revision=%s" % revision for revision in revisions]
michael@0 66 selector = '&'.join(selectors)
michael@0 67 url = "http://graphs.mozilla.org/api/test/runs/revisions?%s" % selector
michael@0 68 url_stream = urllib2.urlopen(url)
michael@0 69 data = json.load(url_stream)
michael@0 70 assert frozenset(data.keys()) == frozenset(('stat', 'revisions'))
michael@0 71 assert data['stat'] == 'ok'
michael@0 72 rev_data = data['revisions']
michael@0 73 assert frozenset(rev_data.keys()) == frozenset(revisions)
michael@0 74 return [rev_data[r] for r in revisions]
michael@0 75
michael@0 76 def mean(values):
michael@0 77 return float(sum(values))/len(values)
michael@0 78
michael@0 79 def c4(n):
michael@0 80 n = float(n)
michael@0 81 numerator = math.gamma(n/2)*math.sqrt(2/(n-1))
michael@0 82 denominator = math.gamma((n-1)/2)
michael@0 83 return numerator/denominator
michael@0 84
michael@0 85 def unbiased_standard_deviation(values):
michael@0 86 n = len(values)
michael@0 87 if n == 1:
michael@0 88 return None
michael@0 89 acc = 0
michael@0 90 avg = mean(values)
michael@0 91 for i in values:
michael@0 92 dist = i - avg
michael@0 93 acc += dist * dist
michael@0 94 return math.sqrt(acc/(n-1))/c4(n)
michael@0 95
michael@0 96 class BenchmarkResult:
michael@0 97 """ Stores the summary (mean and standard deviation) of a set of talus
michael@0 98 runs on the same revision and OS."""
michael@0 99 def __init__(self, avg, std):
michael@0 100 self.avg = avg
michael@0 101 self.std = std
michael@0 102 def __str__(self):
michael@0 103 t = "%s," % self.avg
michael@0 104 return "(%-13s %s)" % (t, self.std)
michael@0 105
michael@0 106 # FIXME: This function computes the statistics of multiple runs of talos on a
michael@0 107 # single revision. Should it also support computing statistics over runs of
michael@0 108 # different revisions assuming the revisions are equivalent from a performance
michael@0 109 # perspective?
michael@0 110 def digest_revision_data(data):
michael@0 111 ret = {}
michael@0 112 benchmarks = frozenset(data.keys())
michael@0 113 # assert that all the benchmarks are known. If they are not,
michael@0 114 # smaller_is_better or bigger_is_better needs to be updated depending on
michael@0 115 # the benchmark type.
michael@0 116 assert all_benchmarks.issuperset(benchmarks), \
michael@0 117 "%s not found in all_benchmarks" % ','.join((benchmarks - all_benchmarks))
michael@0 118 for benchmark in benchmarks:
michael@0 119 benchmark_data = data[benchmark]
michael@0 120 expected_keys = frozenset(("test_runs", "name", "id"))
michael@0 121 assert frozenset(benchmark_data.keys()) == expected_keys
michael@0 122 test_runs = benchmark_data["test_runs"]
michael@0 123 operating_systems = test_runs.keys()
michael@0 124 results = {}
michael@0 125 for os in operating_systems:
michael@0 126 os_runs = test_runs[os]
michael@0 127 values = []
michael@0 128 for os_run in os_runs:
michael@0 129 # there are 4 fields: test run id, build id, timestamp,
michael@0 130 # mean value
michael@0 131 assert len(os_run) == 4
michael@0 132 values.append(os_run[3])
michael@0 133 avg = mean(values)
michael@0 134 std = unbiased_standard_deviation(values)
michael@0 135 results[os] = BenchmarkResult(avg, std)
michael@0 136 ret[benchmark] = results
michael@0 137 return ret
michael@0 138
michael@0 139 def get_data_for_revisions(revisions):
michael@0 140 raw_data = get_raw_data_for_revisions(revisions)
michael@0 141 return [digest_revision_data(x) for x in raw_data]
michael@0 142
michael@0 143 def overlaps(a, b):
michael@0 144 return a[1] >= b[0] and b[1] >= a[0]
michael@0 145
michael@0 146 def is_significant(old, new):
michael@0 147 # conservative hack: if we don't know, say it is significant.
michael@0 148 if old.std is None or new.std is None:
michael@0 149 return True
michael@0 150 # use a 2 standard deviation interval, which is about 95% confidence.
michael@0 151 old_interval = [old.avg - old.std, old.avg + old.std]
michael@0 152 new_interval = [new.avg - new.std, new.avg + new.std]
michael@0 153 return not overlaps(old_interval, new_interval)
michael@0 154
michael@0 155 def compute_difference(benchmark, old, new):
michael@0 156 if benchmark in bigger_is_better:
michael@0 157 new, old = old, new
michael@0 158
michael@0 159 if new.avg >= old.avg:
michael@0 160 return "%1.4fx worse" % (new.avg/old.avg)
michael@0 161 else:
michael@0 162 return "%1.4fx better" % (old.avg/new.avg)
michael@0 163
michael@0 164 #FIXME: the printing could use a table class that computes the sizes of the
michael@0 165 # cells instead of the current hard coded values.
michael@0 166 def print_data_comparison(datav):
michael@0 167 assert len(datav) == 2
michael@0 168 old_data = datav[0]
michael@0 169 new_data = datav[1]
michael@0 170 old_benchmarks = frozenset(old_data.keys())
michael@0 171 new_benchmarks = frozenset(new_data.keys())
michael@0 172 benchmarks = old_benchmarks.intersection(new_benchmarks)
michael@0 173 for benchmark in sorted(benchmarks):
michael@0 174 print benchmark
michael@0 175 old_benchmark_data = old_data[benchmark]
michael@0 176 new_benchmark_data = new_data[benchmark]
michael@0 177 old_operating_systems = frozenset(old_benchmark_data.keys())
michael@0 178 new_operating_systems = frozenset(new_benchmark_data.keys())
michael@0 179 operating_systems = old_operating_systems.intersection(new_operating_systems)
michael@0 180 for os in sorted(operating_systems):
michael@0 181 old_os_data = old_benchmark_data[os]
michael@0 182 new_os_data = new_benchmark_data[os]
michael@0 183 if not is_significant(old_os_data, new_os_data):
michael@0 184 continue
michael@0 185
michael@0 186 diff = compute_difference(benchmark, old_os_data, new_os_data)
michael@0 187 print '%-33s | %-30s -> %-30s %s' % \
michael@0 188 (os, old_os_data, new_os_data, diff)
michael@0 189 print
michael@0 190
michael@0 191 def main():
michael@0 192 parser = OptionParser(usage='Usage: %prog old_revision new_revision')
michael@0 193 options, args = parser.parse_args()
michael@0 194 if len(args) != 2:
michael@0 195 parser.print_help()
michael@0 196 sys.exit(1)
michael@0 197
michael@0 198 print_data_comparison(get_data_for_revisions(args))
michael@0 199
michael@0 200 if __name__ == '__main__':
michael@0 201 main()

mercurial