michael@0: # michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: package URLTimingDataSet; michael@0: use DBI; michael@0: use PageData; # list of test pages, etc. michael@0: use strict; michael@0: michael@0: sub new { michael@0: my $proto = shift; michael@0: my $class = ref($proto) || $proto; michael@0: my $self = { michael@0: dataset => [], michael@0: results => [], michael@0: sorted => [], michael@0: average => undef, michael@0: avgmedian => undef, # note: average of individual medians michael@0: maximum => undef, michael@0: minimum => undef, michael@0: }; michael@0: $self->{id} = shift || die "No id supplied"; michael@0: $self->{table} = shift || "t" . $self->{id}; michael@0: $self->{pages} = PageData->new; michael@0: bless ($self, $class); michael@0: $self->_grok(); michael@0: return $self; michael@0: } michael@0: michael@0: michael@0: sub _grok { michael@0: my $self = shift; michael@0: my ($res); michael@0: michael@0: # select the dataset from the db michael@0: $self->_select(); michael@0: michael@0: for (my $i=0; $i < $self->{pages}->length; $i++) { michael@0: my $name = $self->{pages}->name($i); michael@0: my $count = 0; michael@0: my @times = (); michael@0: my $nan = 0; michael@0: foreach my $ref (@{$self->{dataset}}) { michael@0: next if ($name ne $ref->{content}); michael@0: $count++; michael@0: if ($ref->{c_part} eq "NaN") { michael@0: # we bailed out of this page load michael@0: $res = "NaN"; michael@0: $nan = 1; michael@0: } michael@0: else { michael@0: my $s_intvl = $ref->{s_intvl}; michael@0: my $c_intvl = $ref->{c_intvl}; michael@0: my $errval = abs($s_intvl-$c_intvl)/(($s_intvl+$c_intvl)/2); michael@0: if ($errval > 0.08) { # one of them went wrong and stalled out (see [1] below) michael@0: $res = ($s_intvl <= $c_intvl) ? $s_intvl : $c_intvl; michael@0: } else { michael@0: $res = int(($s_intvl + $c_intvl)/2); michael@0: } michael@0: } michael@0: push @times, $res; michael@0: } michael@0: michael@0: my $avg = int(_avg(@times)); michael@0: my $med = _med(@times); michael@0: my $max = $nan ? "NaN" : _max(@times); michael@0: my $min = _min(@times); michael@0: push @{$self->{results}}, [ $i, $name, $count, $avg, $med, $max, $min, @times ]; michael@0: } michael@0: michael@0: $self->_get_summary(); michael@0: $self->_sort_result_set(); michael@0: michael@0: } michael@0: michael@0: sub _select { michael@0: my $self = shift; michael@0: michael@0: my $dbh = DBI->connect("DBI:CSV:f_dir=./db", {RaiseError => 1, AutoCommit => 1}) michael@0: or die "Cannot connect: " . $DBI::errstr; michael@0: michael@0: my $sql = qq{ michael@0: SELECT INDEX, S_INTVL, C_INTVL, C_PART, CONTENT, ID michael@0: FROM $self->{table} michael@0: WHERE ID = '$self->{id}' michael@0: }; michael@0: michael@0: my $sth = $dbh->prepare($sql); michael@0: $sth->execute(); michael@0: michael@0: while (my @data = $sth->fetchrow_array()) { michael@0: push @{$self->{dataset}}, michael@0: {index => $data[0], michael@0: s_intvl => $data[1], michael@0: c_intvl => $data[2], michael@0: c_part => $data[3], michael@0: content => $data[4], michael@0: id => $data[5] michael@0: }; michael@0: } michael@0: $sth->finish(); michael@0: $dbh->disconnect(); michael@0: } michael@0: michael@0: sub _get_summary { michael@0: my $self = shift; michael@0: my (@avg, @med, @max, @min); michael@0: michael@0: # how many pages were loaded in total ('sampled') michael@0: $self->{samples} = scalar(@{$self->{dataset}}); michael@0: michael@0: # how many cycles (should I get this from test parameters instead?) michael@0: $self->{count} = int(_avg( map($_->[2], @{$self->{results}}) )); michael@0: #warn $self->{count}; michael@0: michael@0: # calculate overall average, average median, maximum, minimum, (RMS Error?) michael@0: for (@{$self->{results}}) { michael@0: push @avg, $_->[3]; michael@0: push @med, $_->[4]; michael@0: push @max, $_->[5]; michael@0: push @min, $_->[6]; michael@0: } michael@0: $self->{average} = int(_avg(@avg)); michael@0: $self->{avgmedian} = int(_avg(@med)); # note: averaging individual medians michael@0: $self->{maximum} = _max(@max); michael@0: $self->{minimum} = _min(@min); michael@0: } michael@0: michael@0: sub _sort_result_set { michael@0: my $self = shift; michael@0: # sort by median load time michael@0: # @{$self->{sorted}} = sort {$a->[4] <=> $b->[4]} @{$self->{results}}; michael@0: # might be "NaN", but this is lame of me to be carrying around a string instead of undef michael@0: @{$self->{sorted}} = michael@0: sort { michael@0: if ($a->[4] eq "NaN" || $b->[4] eq "NaN") { michael@0: return $a->[4] cmp $b->[4]; michael@0: } else { michael@0: return $a->[4] <=> $b->[4]; michael@0: } michael@0: } @{$self->{results}}; michael@0: } michael@0: michael@0: sub as_string { michael@0: my $self = shift; michael@0: return $self->_as_string(); michael@0: } michael@0: michael@0: sub as_string_sorted { michael@0: my $self = shift; michael@0: return $self->_as_string(@{$self->{sorted}}); michael@0: } michael@0: michael@0: michael@0: sub _as_string { michael@0: my $self = shift; michael@0: my @ary = @_ ? @_ : @{$self->{results}}; michael@0: my $str; michael@0: for (@ary) { michael@0: my ($index, $path, $count, $avg, $med, $max, $min, @times) = @$_; michael@0: $str .= sprintf "%3s %-26s\t", $index, $path; michael@0: if ($count > 0) { michael@0: $str .= sprintf "%6s %6s %6s %6s ", $avg, $med, $max, $min; michael@0: foreach my $time (@times) { michael@0: $str .= sprintf "%6s ", $time; michael@0: } michael@0: } michael@0: $str .= "\n"; michael@0: } michael@0: return $str; michael@0: } michael@0: michael@0: # michael@0: # package internal helper functions michael@0: # michael@0: sub _num { michael@0: my @array = (); michael@0: for (@_) { push @array, $_ if /^[+-]?\d+\.?\d*$/o; } michael@0: return @array; michael@0: } michael@0: michael@0: sub _avg { michael@0: my @array = _num(@_); michael@0: return "NaN" unless scalar(@array); michael@0: my $sum = 0; michael@0: for (@array) { $sum += $_; } michael@0: return $sum/scalar(@array); michael@0: } michael@0: michael@0: sub _max { michael@0: my @array = _num(@_); michael@0: return "NaN" unless scalar(@array); michael@0: my $max = $array[0]; michael@0: for (@array) { $max = ($max > $_) ? $max : $_; } michael@0: return $max; michael@0: } michael@0: michael@0: sub _min { michael@0: my @array = _num(@_); michael@0: return "NaN" unless scalar(@array); michael@0: my $min = $array[0]; michael@0: for (@array) { $min = ($min < $_) ? $min : $_; } michael@0: return $min; michael@0: } michael@0: michael@0: # returns the floor(N/2) element of a sorted ascending array michael@0: sub _med { michael@0: my @array = _num(@_); michael@0: return "NaN" unless scalar(@array); michael@0: my $index = int((scalar(@array)-1)/2); michael@0: @array = sort {$a <=> $b} @array; michael@0: return $array[$index]; michael@0: } michael@0: michael@0: 1; # return true michael@0: michael@0: ################################################################################ michael@0: # michael@0: # [1] in looking at the test results, in almost all cases, the michael@0: # round-trip time measured by the server logic and the client logic michael@0: # would be almost the same value (which is what one would michael@0: # expect). However, on occasion, one of the them would be "out of michael@0: # whack", and inconsistent with the additional "layout" measure by the michael@0: # client. michael@0: # michael@0: # i.e., a set of numbers like these: michael@0: # c_part c_intvl s_intvl michael@0: # 800 1003 997 michael@0: # 804 1007 1005 michael@0: # 801 1001 1325 <-- michael@0: # 803 1318 998 <-- michael@0: # 799 1002 1007 michael@0: # ... michael@0: # michael@0: # which looks like the server side would stall in doing the accept or michael@0: # in running the mod-perl handler (possibly a GC?). (The following michael@0: # c_intvl would then be out of whack by a matching amount on the next michael@0: # cycle). michael@0: # michael@0: # At any rate, since it was clear from comparing with the 'c_part' michael@0: # measure, which of the times was bogus, I just use an arbitrary error michael@0: # measure to determine when to toss out the "bad" value. michael@0: #