tools/page-loader/URLTimingDataSet.pm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 #
michael@0 2 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 5 package URLTimingDataSet;
michael@0 6 use DBI;
michael@0 7 use PageData; # list of test pages, etc.
michael@0 8 use strict;
michael@0 9
michael@0 10 sub new {
michael@0 11 my $proto = shift;
michael@0 12 my $class = ref($proto) || $proto;
michael@0 13 my $self = {
michael@0 14 dataset => [],
michael@0 15 results => [],
michael@0 16 sorted => [],
michael@0 17 average => undef,
michael@0 18 avgmedian => undef, # note: average of individual medians
michael@0 19 maximum => undef,
michael@0 20 minimum => undef,
michael@0 21 };
michael@0 22 $self->{id} = shift || die "No id supplied";
michael@0 23 $self->{table} = shift || "t" . $self->{id};
michael@0 24 $self->{pages} = PageData->new;
michael@0 25 bless ($self, $class);
michael@0 26 $self->_grok();
michael@0 27 return $self;
michael@0 28 }
michael@0 29
michael@0 30
michael@0 31 sub _grok {
michael@0 32 my $self = shift;
michael@0 33 my ($res);
michael@0 34
michael@0 35 # select the dataset from the db
michael@0 36 $self->_select();
michael@0 37
michael@0 38 for (my $i=0; $i < $self->{pages}->length; $i++) {
michael@0 39 my $name = $self->{pages}->name($i);
michael@0 40 my $count = 0;
michael@0 41 my @times = ();
michael@0 42 my $nan = 0;
michael@0 43 foreach my $ref (@{$self->{dataset}}) {
michael@0 44 next if ($name ne $ref->{content});
michael@0 45 $count++;
michael@0 46 if ($ref->{c_part} eq "NaN") {
michael@0 47 # we bailed out of this page load
michael@0 48 $res = "NaN";
michael@0 49 $nan = 1;
michael@0 50 }
michael@0 51 else {
michael@0 52 my $s_intvl = $ref->{s_intvl};
michael@0 53 my $c_intvl = $ref->{c_intvl};
michael@0 54 my $errval = abs($s_intvl-$c_intvl)/(($s_intvl+$c_intvl)/2);
michael@0 55 if ($errval > 0.08) { # one of them went wrong and stalled out (see [1] below)
michael@0 56 $res = ($s_intvl <= $c_intvl) ? $s_intvl : $c_intvl;
michael@0 57 } else {
michael@0 58 $res = int(($s_intvl + $c_intvl)/2);
michael@0 59 }
michael@0 60 }
michael@0 61 push @times, $res;
michael@0 62 }
michael@0 63
michael@0 64 my $avg = int(_avg(@times));
michael@0 65 my $med = _med(@times);
michael@0 66 my $max = $nan ? "NaN" : _max(@times);
michael@0 67 my $min = _min(@times);
michael@0 68 push @{$self->{results}}, [ $i, $name, $count, $avg, $med, $max, $min, @times ];
michael@0 69 }
michael@0 70
michael@0 71 $self->_get_summary();
michael@0 72 $self->_sort_result_set();
michael@0 73
michael@0 74 }
michael@0 75
michael@0 76 sub _select {
michael@0 77 my $self = shift;
michael@0 78
michael@0 79 my $dbh = DBI->connect("DBI:CSV:f_dir=./db", {RaiseError => 1, AutoCommit => 1})
michael@0 80 or die "Cannot connect: " . $DBI::errstr;
michael@0 81
michael@0 82 my $sql = qq{
michael@0 83 SELECT INDEX, S_INTVL, C_INTVL, C_PART, CONTENT, ID
michael@0 84 FROM $self->{table}
michael@0 85 WHERE ID = '$self->{id}'
michael@0 86 };
michael@0 87
michael@0 88 my $sth = $dbh->prepare($sql);
michael@0 89 $sth->execute();
michael@0 90
michael@0 91 while (my @data = $sth->fetchrow_array()) {
michael@0 92 push @{$self->{dataset}},
michael@0 93 {index => $data[0],
michael@0 94 s_intvl => $data[1],
michael@0 95 c_intvl => $data[2],
michael@0 96 c_part => $data[3],
michael@0 97 content => $data[4],
michael@0 98 id => $data[5]
michael@0 99 };
michael@0 100 }
michael@0 101 $sth->finish();
michael@0 102 $dbh->disconnect();
michael@0 103 }
michael@0 104
michael@0 105 sub _get_summary {
michael@0 106 my $self = shift;
michael@0 107 my (@avg, @med, @max, @min);
michael@0 108
michael@0 109 # how many pages were loaded in total ('sampled')
michael@0 110 $self->{samples} = scalar(@{$self->{dataset}});
michael@0 111
michael@0 112 # how many cycles (should I get this from test parameters instead?)
michael@0 113 $self->{count} = int(_avg( map($_->[2], @{$self->{results}}) ));
michael@0 114 #warn $self->{count};
michael@0 115
michael@0 116 # calculate overall average, average median, maximum, minimum, (RMS Error?)
michael@0 117 for (@{$self->{results}}) {
michael@0 118 push @avg, $_->[3];
michael@0 119 push @med, $_->[4];
michael@0 120 push @max, $_->[5];
michael@0 121 push @min, $_->[6];
michael@0 122 }
michael@0 123 $self->{average} = int(_avg(@avg));
michael@0 124 $self->{avgmedian} = int(_avg(@med)); # note: averaging individual medians
michael@0 125 $self->{maximum} = _max(@max);
michael@0 126 $self->{minimum} = _min(@min);
michael@0 127 }
michael@0 128
michael@0 129 sub _sort_result_set {
michael@0 130 my $self = shift;
michael@0 131 # sort by median load time
michael@0 132 # @{$self->{sorted}} = sort {$a->[4] <=> $b->[4]} @{$self->{results}};
michael@0 133 # might be "NaN", but this is lame of me to be carrying around a string instead of undef
michael@0 134 @{$self->{sorted}} =
michael@0 135 sort {
michael@0 136 if ($a->[4] eq "NaN" || $b->[4] eq "NaN") {
michael@0 137 return $a->[4] cmp $b->[4];
michael@0 138 } else {
michael@0 139 return $a->[4] <=> $b->[4];
michael@0 140 }
michael@0 141 } @{$self->{results}};
michael@0 142 }
michael@0 143
michael@0 144 sub as_string {
michael@0 145 my $self = shift;
michael@0 146 return $self->_as_string();
michael@0 147 }
michael@0 148
michael@0 149 sub as_string_sorted {
michael@0 150 my $self = shift;
michael@0 151 return $self->_as_string(@{$self->{sorted}});
michael@0 152 }
michael@0 153
michael@0 154
michael@0 155 sub _as_string {
michael@0 156 my $self = shift;
michael@0 157 my @ary = @_ ? @_ : @{$self->{results}};
michael@0 158 my $str;
michael@0 159 for (@ary) {
michael@0 160 my ($index, $path, $count, $avg, $med, $max, $min, @times) = @$_;
michael@0 161 $str .= sprintf "%3s %-26s\t", $index, $path;
michael@0 162 if ($count > 0) {
michael@0 163 $str .= sprintf "%6s %6s %6s %6s ", $avg, $med, $max, $min;
michael@0 164 foreach my $time (@times) {
michael@0 165 $str .= sprintf "%6s ", $time;
michael@0 166 }
michael@0 167 }
michael@0 168 $str .= "\n";
michael@0 169 }
michael@0 170 return $str;
michael@0 171 }
michael@0 172
michael@0 173 #
michael@0 174 # package internal helper functions
michael@0 175 #
michael@0 176 sub _num {
michael@0 177 my @array = ();
michael@0 178 for (@_) { push @array, $_ if /^[+-]?\d+\.?\d*$/o; }
michael@0 179 return @array;
michael@0 180 }
michael@0 181
michael@0 182 sub _avg {
michael@0 183 my @array = _num(@_);
michael@0 184 return "NaN" unless scalar(@array);
michael@0 185 my $sum = 0;
michael@0 186 for (@array) { $sum += $_; }
michael@0 187 return $sum/scalar(@array);
michael@0 188 }
michael@0 189
michael@0 190 sub _max {
michael@0 191 my @array = _num(@_);
michael@0 192 return "NaN" unless scalar(@array);
michael@0 193 my $max = $array[0];
michael@0 194 for (@array) { $max = ($max > $_) ? $max : $_; }
michael@0 195 return $max;
michael@0 196 }
michael@0 197
michael@0 198 sub _min {
michael@0 199 my @array = _num(@_);
michael@0 200 return "NaN" unless scalar(@array);
michael@0 201 my $min = $array[0];
michael@0 202 for (@array) { $min = ($min < $_) ? $min : $_; }
michael@0 203 return $min;
michael@0 204 }
michael@0 205
michael@0 206 # returns the floor(N/2) element of a sorted ascending array
michael@0 207 sub _med {
michael@0 208 my @array = _num(@_);
michael@0 209 return "NaN" unless scalar(@array);
michael@0 210 my $index = int((scalar(@array)-1)/2);
michael@0 211 @array = sort {$a <=> $b} @array;
michael@0 212 return $array[$index];
michael@0 213 }
michael@0 214
michael@0 215 1; # return true
michael@0 216
michael@0 217 ################################################################################
michael@0 218 #
michael@0 219 # [1] in looking at the test results, in almost all cases, the
michael@0 220 # round-trip time measured by the server logic and the client logic
michael@0 221 # would be almost the same value (which is what one would
michael@0 222 # expect). However, on occasion, one of the them would be "out of
michael@0 223 # whack", and inconsistent with the additional "layout" measure by the
michael@0 224 # client.
michael@0 225 #
michael@0 226 # i.e., a set of numbers like these:
michael@0 227 # c_part c_intvl s_intvl
michael@0 228 # 800 1003 997
michael@0 229 # 804 1007 1005
michael@0 230 # 801 1001 1325 <--
michael@0 231 # 803 1318 998 <--
michael@0 232 # 799 1002 1007
michael@0 233 # ...
michael@0 234 #
michael@0 235 # which looks like the server side would stall in doing the accept or
michael@0 236 # in running the mod-perl handler (possibly a GC?). (The following
michael@0 237 # c_intvl would then be out of whack by a matching amount on the next
michael@0 238 # cycle).
michael@0 239 #
michael@0 240 # At any rate, since it was clear from comparing with the 'c_part'
michael@0 241 # measure, which of the times was bogus, I just use an arbitrary error
michael@0 242 # measure to determine when to toss out the "bad" value.
michael@0 243 #

mercurial