diff -r 000000000000 -r 6474c204b198 tools/page-loader/URLTimingDataSet.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/page-loader/URLTimingDataSet.pm Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,243 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +package URLTimingDataSet; +use DBI; +use PageData; # list of test pages, etc. +use strict; + +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = { + dataset => [], + results => [], + sorted => [], + average => undef, + avgmedian => undef, # note: average of individual medians + maximum => undef, + minimum => undef, + }; + $self->{id} = shift || die "No id supplied"; + $self->{table} = shift || "t" . $self->{id}; + $self->{pages} = PageData->new; + bless ($self, $class); + $self->_grok(); + return $self; +} + + +sub _grok { + my $self = shift; + my ($res); + + # select the dataset from the db + $self->_select(); + + for (my $i=0; $i < $self->{pages}->length; $i++) { + my $name = $self->{pages}->name($i); + my $count = 0; + my @times = (); + my $nan = 0; + foreach my $ref (@{$self->{dataset}}) { + next if ($name ne $ref->{content}); + $count++; + if ($ref->{c_part} eq "NaN") { + # we bailed out of this page load + $res = "NaN"; + $nan = 1; + } + else { + my $s_intvl = $ref->{s_intvl}; + my $c_intvl = $ref->{c_intvl}; + my $errval = abs($s_intvl-$c_intvl)/(($s_intvl+$c_intvl)/2); + if ($errval > 0.08) { # one of them went wrong and stalled out (see [1] below) + $res = ($s_intvl <= $c_intvl) ? $s_intvl : $c_intvl; + } else { + $res = int(($s_intvl + $c_intvl)/2); + } + } + push @times, $res; + } + + my $avg = int(_avg(@times)); + my $med = _med(@times); + my $max = $nan ? "NaN" : _max(@times); + my $min = _min(@times); + push @{$self->{results}}, [ $i, $name, $count, $avg, $med, $max, $min, @times ]; + } + + $self->_get_summary(); + $self->_sort_result_set(); + +} + +sub _select { + my $self = shift; + + my $dbh = DBI->connect("DBI:CSV:f_dir=./db", {RaiseError => 1, AutoCommit => 1}) + or die "Cannot connect: " . $DBI::errstr; + + my $sql = qq{ + SELECT INDEX, S_INTVL, C_INTVL, C_PART, CONTENT, ID + FROM $self->{table} + WHERE ID = '$self->{id}' + }; + + my $sth = $dbh->prepare($sql); + $sth->execute(); + + while (my @data = $sth->fetchrow_array()) { + push @{$self->{dataset}}, + {index => $data[0], + s_intvl => $data[1], + c_intvl => $data[2], + c_part => $data[3], + content => $data[4], + id => $data[5] + }; + } + $sth->finish(); + $dbh->disconnect(); +} + +sub _get_summary { + my $self = shift; + my (@avg, @med, @max, @min); + + # how many pages were loaded in total ('sampled') + $self->{samples} = scalar(@{$self->{dataset}}); + + # how many cycles (should I get this from test parameters instead?) + $self->{count} = int(_avg( map($_->[2], @{$self->{results}}) )); + #warn $self->{count}; + + # calculate overall average, average median, maximum, minimum, (RMS Error?) + for (@{$self->{results}}) { + push @avg, $_->[3]; + push @med, $_->[4]; + push @max, $_->[5]; + push @min, $_->[6]; + } + $self->{average} = int(_avg(@avg)); + $self->{avgmedian} = int(_avg(@med)); # note: averaging individual medians + $self->{maximum} = _max(@max); + $self->{minimum} = _min(@min); +} + +sub _sort_result_set { + my $self = shift; + # sort by median load time + # @{$self->{sorted}} = sort {$a->[4] <=> $b->[4]} @{$self->{results}}; + # might be "NaN", but this is lame of me to be carrying around a string instead of undef + @{$self->{sorted}} = + sort { + if ($a->[4] eq "NaN" || $b->[4] eq "NaN") { + return $a->[4] cmp $b->[4]; + } else { + return $a->[4] <=> $b->[4]; + } + } @{$self->{results}}; +} + +sub as_string { + my $self = shift; + return $self->_as_string(); +} + +sub as_string_sorted { + my $self = shift; + return $self->_as_string(@{$self->{sorted}}); +} + + +sub _as_string { + my $self = shift; + my @ary = @_ ? @_ : @{$self->{results}}; + my $str; + for (@ary) { + my ($index, $path, $count, $avg, $med, $max, $min, @times) = @$_; + $str .= sprintf "%3s %-26s\t", $index, $path; + if ($count > 0) { + $str .= sprintf "%6s %6s %6s %6s ", $avg, $med, $max, $min; + foreach my $time (@times) { + $str .= sprintf "%6s ", $time; + } + } + $str .= "\n"; + } + return $str; +} + +# +# package internal helper functions +# +sub _num { + my @array = (); + for (@_) { push @array, $_ if /^[+-]?\d+\.?\d*$/o; } + return @array; +} + +sub _avg { + my @array = _num(@_); + return "NaN" unless scalar(@array); + my $sum = 0; + for (@array) { $sum += $_; } + return $sum/scalar(@array); +} + +sub _max { + my @array = _num(@_); + return "NaN" unless scalar(@array); + my $max = $array[0]; + for (@array) { $max = ($max > $_) ? $max : $_; } + return $max; +} + +sub _min { + my @array = _num(@_); + return "NaN" unless scalar(@array); + my $min = $array[0]; + for (@array) { $min = ($min < $_) ? $min : $_; } + return $min; +} + +# returns the floor(N/2) element of a sorted ascending array +sub _med { + my @array = _num(@_); + return "NaN" unless scalar(@array); + my $index = int((scalar(@array)-1)/2); + @array = sort {$a <=> $b} @array; + return $array[$index]; +} + +1; # return true + +################################################################################ +# +# [1] in looking at the test results, in almost all cases, the +# round-trip time measured by the server logic and the client logic +# would be almost the same value (which is what one would +# expect). However, on occasion, one of the them would be "out of +# whack", and inconsistent with the additional "layout" measure by the +# client. +# +# i.e., a set of numbers like these: +# c_part c_intvl s_intvl +# 800 1003 997 +# 804 1007 1005 +# 801 1001 1325 <-- +# 803 1318 998 <-- +# 799 1002 1007 +# ... +# +# which looks like the server side would stall in doing the accept or +# in running the mod-perl handler (possibly a GC?). (The following +# c_intvl would then be out of whack by a matching amount on the next +# cycle). +# +# At any rate, since it was clear from comparing with the 'c_part' +# measure, which of the times was bogus, I just use an arbitrary error +# measure to determine when to toss out the "bad" value. +#