1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/page-loader/URLTimingDataSet.pm Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,243 @@ 1.4 +# 1.5 +# This Source Code Form is subject to the terms of the Mozilla Public 1.6 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.8 +package URLTimingDataSet; 1.9 +use DBI; 1.10 +use PageData; # list of test pages, etc. 1.11 +use strict; 1.12 + 1.13 +sub new { 1.14 + my $proto = shift; 1.15 + my $class = ref($proto) || $proto; 1.16 + my $self = { 1.17 + dataset => [], 1.18 + results => [], 1.19 + sorted => [], 1.20 + average => undef, 1.21 + avgmedian => undef, # note: average of individual medians 1.22 + maximum => undef, 1.23 + minimum => undef, 1.24 + }; 1.25 + $self->{id} = shift || die "No id supplied"; 1.26 + $self->{table} = shift || "t" . $self->{id}; 1.27 + $self->{pages} = PageData->new; 1.28 + bless ($self, $class); 1.29 + $self->_grok(); 1.30 + return $self; 1.31 +} 1.32 + 1.33 + 1.34 +sub _grok { 1.35 + my $self = shift; 1.36 + my ($res); 1.37 + 1.38 + # select the dataset from the db 1.39 + $self->_select(); 1.40 + 1.41 + for (my $i=0; $i < $self->{pages}->length; $i++) { 1.42 + my $name = $self->{pages}->name($i); 1.43 + my $count = 0; 1.44 + my @times = (); 1.45 + my $nan = 0; 1.46 + foreach my $ref (@{$self->{dataset}}) { 1.47 + next if ($name ne $ref->{content}); 1.48 + $count++; 1.49 + if ($ref->{c_part} eq "NaN") { 1.50 + # we bailed out of this page load 1.51 + $res = "NaN"; 1.52 + $nan = 1; 1.53 + } 1.54 + else { 1.55 + my $s_intvl = $ref->{s_intvl}; 1.56 + my $c_intvl = $ref->{c_intvl}; 1.57 + my $errval = abs($s_intvl-$c_intvl)/(($s_intvl+$c_intvl)/2); 1.58 + if ($errval > 0.08) { # one of them went wrong and stalled out (see [1] below) 1.59 + $res = ($s_intvl <= $c_intvl) ? $s_intvl : $c_intvl; 1.60 + } else { 1.61 + $res = int(($s_intvl + $c_intvl)/2); 1.62 + } 1.63 + } 1.64 + push @times, $res; 1.65 + } 1.66 + 1.67 + my $avg = int(_avg(@times)); 1.68 + my $med = _med(@times); 1.69 + my $max = $nan ? "NaN" : _max(@times); 1.70 + my $min = _min(@times); 1.71 + push @{$self->{results}}, [ $i, $name, $count, $avg, $med, $max, $min, @times ]; 1.72 + } 1.73 + 1.74 + $self->_get_summary(); 1.75 + $self->_sort_result_set(); 1.76 + 1.77 +} 1.78 + 1.79 +sub _select { 1.80 + my $self = shift; 1.81 + 1.82 + my $dbh = DBI->connect("DBI:CSV:f_dir=./db", {RaiseError => 1, AutoCommit => 1}) 1.83 + or die "Cannot connect: " . $DBI::errstr; 1.84 + 1.85 + my $sql = qq{ 1.86 + SELECT INDEX, S_INTVL, C_INTVL, C_PART, CONTENT, ID 1.87 + FROM $self->{table} 1.88 + WHERE ID = '$self->{id}' 1.89 + }; 1.90 + 1.91 + my $sth = $dbh->prepare($sql); 1.92 + $sth->execute(); 1.93 + 1.94 + while (my @data = $sth->fetchrow_array()) { 1.95 + push @{$self->{dataset}}, 1.96 + {index => $data[0], 1.97 + s_intvl => $data[1], 1.98 + c_intvl => $data[2], 1.99 + c_part => $data[3], 1.100 + content => $data[4], 1.101 + id => $data[5] 1.102 + }; 1.103 + } 1.104 + $sth->finish(); 1.105 + $dbh->disconnect(); 1.106 +} 1.107 + 1.108 +sub _get_summary { 1.109 + my $self = shift; 1.110 + my (@avg, @med, @max, @min); 1.111 + 1.112 + # how many pages were loaded in total ('sampled') 1.113 + $self->{samples} = scalar(@{$self->{dataset}}); 1.114 + 1.115 + # how many cycles (should I get this from test parameters instead?) 1.116 + $self->{count} = int(_avg( map($_->[2], @{$self->{results}}) )); 1.117 + #warn $self->{count}; 1.118 + 1.119 + # calculate overall average, average median, maximum, minimum, (RMS Error?) 1.120 + for (@{$self->{results}}) { 1.121 + push @avg, $_->[3]; 1.122 + push @med, $_->[4]; 1.123 + push @max, $_->[5]; 1.124 + push @min, $_->[6]; 1.125 + } 1.126 + $self->{average} = int(_avg(@avg)); 1.127 + $self->{avgmedian} = int(_avg(@med)); # note: averaging individual medians 1.128 + $self->{maximum} = _max(@max); 1.129 + $self->{minimum} = _min(@min); 1.130 +} 1.131 + 1.132 +sub _sort_result_set { 1.133 + my $self = shift; 1.134 + # sort by median load time 1.135 + # @{$self->{sorted}} = sort {$a->[4] <=> $b->[4]} @{$self->{results}}; 1.136 + # might be "NaN", but this is lame of me to be carrying around a string instead of undef 1.137 + @{$self->{sorted}} = 1.138 + sort { 1.139 + if ($a->[4] eq "NaN" || $b->[4] eq "NaN") { 1.140 + return $a->[4] cmp $b->[4]; 1.141 + } else { 1.142 + return $a->[4] <=> $b->[4]; 1.143 + } 1.144 + } @{$self->{results}}; 1.145 +} 1.146 + 1.147 +sub as_string { 1.148 + my $self = shift; 1.149 + return $self->_as_string(); 1.150 +} 1.151 + 1.152 +sub as_string_sorted { 1.153 + my $self = shift; 1.154 + return $self->_as_string(@{$self->{sorted}}); 1.155 +} 1.156 + 1.157 + 1.158 +sub _as_string { 1.159 + my $self = shift; 1.160 + my @ary = @_ ? @_ : @{$self->{results}}; 1.161 + my $str; 1.162 + for (@ary) { 1.163 + my ($index, $path, $count, $avg, $med, $max, $min, @times) = @$_; 1.164 + $str .= sprintf "%3s %-26s\t", $index, $path; 1.165 + if ($count > 0) { 1.166 + $str .= sprintf "%6s %6s %6s %6s ", $avg, $med, $max, $min; 1.167 + foreach my $time (@times) { 1.168 + $str .= sprintf "%6s ", $time; 1.169 + } 1.170 + } 1.171 + $str .= "\n"; 1.172 + } 1.173 + return $str; 1.174 +} 1.175 + 1.176 +# 1.177 +# package internal helper functions 1.178 +# 1.179 +sub _num { 1.180 + my @array = (); 1.181 + for (@_) { push @array, $_ if /^[+-]?\d+\.?\d*$/o; } 1.182 + return @array; 1.183 +} 1.184 + 1.185 +sub _avg { 1.186 + my @array = _num(@_); 1.187 + return "NaN" unless scalar(@array); 1.188 + my $sum = 0; 1.189 + for (@array) { $sum += $_; } 1.190 + return $sum/scalar(@array); 1.191 +} 1.192 + 1.193 +sub _max { 1.194 + my @array = _num(@_); 1.195 + return "NaN" unless scalar(@array); 1.196 + my $max = $array[0]; 1.197 + for (@array) { $max = ($max > $_) ? $max : $_; } 1.198 + return $max; 1.199 +} 1.200 + 1.201 +sub _min { 1.202 + my @array = _num(@_); 1.203 + return "NaN" unless scalar(@array); 1.204 + my $min = $array[0]; 1.205 + for (@array) { $min = ($min < $_) ? $min : $_; } 1.206 + return $min; 1.207 +} 1.208 + 1.209 +# returns the floor(N/2) element of a sorted ascending array 1.210 +sub _med { 1.211 + my @array = _num(@_); 1.212 + return "NaN" unless scalar(@array); 1.213 + my $index = int((scalar(@array)-1)/2); 1.214 + @array = sort {$a <=> $b} @array; 1.215 + return $array[$index]; 1.216 +} 1.217 + 1.218 +1; # return true 1.219 + 1.220 +################################################################################ 1.221 +# 1.222 +# [1] in looking at the test results, in almost all cases, the 1.223 +# round-trip time measured by the server logic and the client logic 1.224 +# would be almost the same value (which is what one would 1.225 +# expect). However, on occasion, one of the them would be "out of 1.226 +# whack", and inconsistent with the additional "layout" measure by the 1.227 +# client. 1.228 +# 1.229 +# i.e., a set of numbers like these: 1.230 +# c_part c_intvl s_intvl 1.231 +# 800 1003 997 1.232 +# 804 1007 1005 1.233 +# 801 1001 1325 <-- 1.234 +# 803 1318 998 <-- 1.235 +# 799 1002 1007 1.236 +# ... 1.237 +# 1.238 +# which looks like the server side would stall in doing the accept or 1.239 +# in running the mod-perl handler (possibly a GC?). (The following 1.240 +# c_intvl would then be out of whack by a matching amount on the next 1.241 +# cycle). 1.242 +# 1.243 +# At any rate, since it was clear from comparing with the 'c_part' 1.244 +# measure, which of the times was bogus, I just use an arbitrary error 1.245 +# measure to determine when to toss out the "bad" value. 1.246 +#