Fri, 16 Jan 2015 18:13:44 +0100
Integrate suggestion from review to improve consistency with existing code.
michael@0 | 1 | # |
michael@0 | 2 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 5 | package URLTimingDataSet; |
michael@0 | 6 | use DBI; |
michael@0 | 7 | use PageData; # list of test pages, etc. |
michael@0 | 8 | use strict; |
michael@0 | 9 | |
michael@0 | 10 | sub new { |
michael@0 | 11 | my $proto = shift; |
michael@0 | 12 | my $class = ref($proto) || $proto; |
michael@0 | 13 | my $self = { |
michael@0 | 14 | dataset => [], |
michael@0 | 15 | results => [], |
michael@0 | 16 | sorted => [], |
michael@0 | 17 | average => undef, |
michael@0 | 18 | avgmedian => undef, # note: average of individual medians |
michael@0 | 19 | maximum => undef, |
michael@0 | 20 | minimum => undef, |
michael@0 | 21 | }; |
michael@0 | 22 | $self->{id} = shift || die "No id supplied"; |
michael@0 | 23 | $self->{table} = shift || "t" . $self->{id}; |
michael@0 | 24 | $self->{pages} = PageData->new; |
michael@0 | 25 | bless ($self, $class); |
michael@0 | 26 | $self->_grok(); |
michael@0 | 27 | return $self; |
michael@0 | 28 | } |
michael@0 | 29 | |
michael@0 | 30 | |
michael@0 | 31 | sub _grok { |
michael@0 | 32 | my $self = shift; |
michael@0 | 33 | my ($res); |
michael@0 | 34 | |
michael@0 | 35 | # select the dataset from the db |
michael@0 | 36 | $self->_select(); |
michael@0 | 37 | |
michael@0 | 38 | for (my $i=0; $i < $self->{pages}->length; $i++) { |
michael@0 | 39 | my $name = $self->{pages}->name($i); |
michael@0 | 40 | my $count = 0; |
michael@0 | 41 | my @times = (); |
michael@0 | 42 | my $nan = 0; |
michael@0 | 43 | foreach my $ref (@{$self->{dataset}}) { |
michael@0 | 44 | next if ($name ne $ref->{content}); |
michael@0 | 45 | $count++; |
michael@0 | 46 | if ($ref->{c_part} eq "NaN") { |
michael@0 | 47 | # we bailed out of this page load |
michael@0 | 48 | $res = "NaN"; |
michael@0 | 49 | $nan = 1; |
michael@0 | 50 | } |
michael@0 | 51 | else { |
michael@0 | 52 | my $s_intvl = $ref->{s_intvl}; |
michael@0 | 53 | my $c_intvl = $ref->{c_intvl}; |
michael@0 | 54 | my $errval = abs($s_intvl-$c_intvl)/(($s_intvl+$c_intvl)/2); |
michael@0 | 55 | if ($errval > 0.08) { # one of them went wrong and stalled out (see [1] below) |
michael@0 | 56 | $res = ($s_intvl <= $c_intvl) ? $s_intvl : $c_intvl; |
michael@0 | 57 | } else { |
michael@0 | 58 | $res = int(($s_intvl + $c_intvl)/2); |
michael@0 | 59 | } |
michael@0 | 60 | } |
michael@0 | 61 | push @times, $res; |
michael@0 | 62 | } |
michael@0 | 63 | |
michael@0 | 64 | my $avg = int(_avg(@times)); |
michael@0 | 65 | my $med = _med(@times); |
michael@0 | 66 | my $max = $nan ? "NaN" : _max(@times); |
michael@0 | 67 | my $min = _min(@times); |
michael@0 | 68 | push @{$self->{results}}, [ $i, $name, $count, $avg, $med, $max, $min, @times ]; |
michael@0 | 69 | } |
michael@0 | 70 | |
michael@0 | 71 | $self->_get_summary(); |
michael@0 | 72 | $self->_sort_result_set(); |
michael@0 | 73 | |
michael@0 | 74 | } |
michael@0 | 75 | |
michael@0 | 76 | sub _select { |
michael@0 | 77 | my $self = shift; |
michael@0 | 78 | |
michael@0 | 79 | my $dbh = DBI->connect("DBI:CSV:f_dir=./db", {RaiseError => 1, AutoCommit => 1}) |
michael@0 | 80 | or die "Cannot connect: " . $DBI::errstr; |
michael@0 | 81 | |
michael@0 | 82 | my $sql = qq{ |
michael@0 | 83 | SELECT INDEX, S_INTVL, C_INTVL, C_PART, CONTENT, ID |
michael@0 | 84 | FROM $self->{table} |
michael@0 | 85 | WHERE ID = '$self->{id}' |
michael@0 | 86 | }; |
michael@0 | 87 | |
michael@0 | 88 | my $sth = $dbh->prepare($sql); |
michael@0 | 89 | $sth->execute(); |
michael@0 | 90 | |
michael@0 | 91 | while (my @data = $sth->fetchrow_array()) { |
michael@0 | 92 | push @{$self->{dataset}}, |
michael@0 | 93 | {index => $data[0], |
michael@0 | 94 | s_intvl => $data[1], |
michael@0 | 95 | c_intvl => $data[2], |
michael@0 | 96 | c_part => $data[3], |
michael@0 | 97 | content => $data[4], |
michael@0 | 98 | id => $data[5] |
michael@0 | 99 | }; |
michael@0 | 100 | } |
michael@0 | 101 | $sth->finish(); |
michael@0 | 102 | $dbh->disconnect(); |
michael@0 | 103 | } |
michael@0 | 104 | |
michael@0 | 105 | sub _get_summary { |
michael@0 | 106 | my $self = shift; |
michael@0 | 107 | my (@avg, @med, @max, @min); |
michael@0 | 108 | |
michael@0 | 109 | # how many pages were loaded in total ('sampled') |
michael@0 | 110 | $self->{samples} = scalar(@{$self->{dataset}}); |
michael@0 | 111 | |
michael@0 | 112 | # how many cycles (should I get this from test parameters instead?) |
michael@0 | 113 | $self->{count} = int(_avg( map($_->[2], @{$self->{results}}) )); |
michael@0 | 114 | #warn $self->{count}; |
michael@0 | 115 | |
michael@0 | 116 | # calculate overall average, average median, maximum, minimum, (RMS Error?) |
michael@0 | 117 | for (@{$self->{results}}) { |
michael@0 | 118 | push @avg, $_->[3]; |
michael@0 | 119 | push @med, $_->[4]; |
michael@0 | 120 | push @max, $_->[5]; |
michael@0 | 121 | push @min, $_->[6]; |
michael@0 | 122 | } |
michael@0 | 123 | $self->{average} = int(_avg(@avg)); |
michael@0 | 124 | $self->{avgmedian} = int(_avg(@med)); # note: averaging individual medians |
michael@0 | 125 | $self->{maximum} = _max(@max); |
michael@0 | 126 | $self->{minimum} = _min(@min); |
michael@0 | 127 | } |
michael@0 | 128 | |
michael@0 | 129 | sub _sort_result_set { |
michael@0 | 130 | my $self = shift; |
michael@0 | 131 | # sort by median load time |
michael@0 | 132 | # @{$self->{sorted}} = sort {$a->[4] <=> $b->[4]} @{$self->{results}}; |
michael@0 | 133 | # might be "NaN", but this is lame of me to be carrying around a string instead of undef |
michael@0 | 134 | @{$self->{sorted}} = |
michael@0 | 135 | sort { |
michael@0 | 136 | if ($a->[4] eq "NaN" || $b->[4] eq "NaN") { |
michael@0 | 137 | return $a->[4] cmp $b->[4]; |
michael@0 | 138 | } else { |
michael@0 | 139 | return $a->[4] <=> $b->[4]; |
michael@0 | 140 | } |
michael@0 | 141 | } @{$self->{results}}; |
michael@0 | 142 | } |
michael@0 | 143 | |
michael@0 | 144 | sub as_string { |
michael@0 | 145 | my $self = shift; |
michael@0 | 146 | return $self->_as_string(); |
michael@0 | 147 | } |
michael@0 | 148 | |
michael@0 | 149 | sub as_string_sorted { |
michael@0 | 150 | my $self = shift; |
michael@0 | 151 | return $self->_as_string(@{$self->{sorted}}); |
michael@0 | 152 | } |
michael@0 | 153 | |
michael@0 | 154 | |
michael@0 | 155 | sub _as_string { |
michael@0 | 156 | my $self = shift; |
michael@0 | 157 | my @ary = @_ ? @_ : @{$self->{results}}; |
michael@0 | 158 | my $str; |
michael@0 | 159 | for (@ary) { |
michael@0 | 160 | my ($index, $path, $count, $avg, $med, $max, $min, @times) = @$_; |
michael@0 | 161 | $str .= sprintf "%3s %-26s\t", $index, $path; |
michael@0 | 162 | if ($count > 0) { |
michael@0 | 163 | $str .= sprintf "%6s %6s %6s %6s ", $avg, $med, $max, $min; |
michael@0 | 164 | foreach my $time (@times) { |
michael@0 | 165 | $str .= sprintf "%6s ", $time; |
michael@0 | 166 | } |
michael@0 | 167 | } |
michael@0 | 168 | $str .= "\n"; |
michael@0 | 169 | } |
michael@0 | 170 | return $str; |
michael@0 | 171 | } |
michael@0 | 172 | |
michael@0 | 173 | # |
michael@0 | 174 | # package internal helper functions |
michael@0 | 175 | # |
michael@0 | 176 | sub _num { |
michael@0 | 177 | my @array = (); |
michael@0 | 178 | for (@_) { push @array, $_ if /^[+-]?\d+\.?\d*$/o; } |
michael@0 | 179 | return @array; |
michael@0 | 180 | } |
michael@0 | 181 | |
michael@0 | 182 | sub _avg { |
michael@0 | 183 | my @array = _num(@_); |
michael@0 | 184 | return "NaN" unless scalar(@array); |
michael@0 | 185 | my $sum = 0; |
michael@0 | 186 | for (@array) { $sum += $_; } |
michael@0 | 187 | return $sum/scalar(@array); |
michael@0 | 188 | } |
michael@0 | 189 | |
michael@0 | 190 | sub _max { |
michael@0 | 191 | my @array = _num(@_); |
michael@0 | 192 | return "NaN" unless scalar(@array); |
michael@0 | 193 | my $max = $array[0]; |
michael@0 | 194 | for (@array) { $max = ($max > $_) ? $max : $_; } |
michael@0 | 195 | return $max; |
michael@0 | 196 | } |
michael@0 | 197 | |
michael@0 | 198 | sub _min { |
michael@0 | 199 | my @array = _num(@_); |
michael@0 | 200 | return "NaN" unless scalar(@array); |
michael@0 | 201 | my $min = $array[0]; |
michael@0 | 202 | for (@array) { $min = ($min < $_) ? $min : $_; } |
michael@0 | 203 | return $min; |
michael@0 | 204 | } |
michael@0 | 205 | |
michael@0 | 206 | # returns the floor(N/2) element of a sorted ascending array |
michael@0 | 207 | sub _med { |
michael@0 | 208 | my @array = _num(@_); |
michael@0 | 209 | return "NaN" unless scalar(@array); |
michael@0 | 210 | my $index = int((scalar(@array)-1)/2); |
michael@0 | 211 | @array = sort {$a <=> $b} @array; |
michael@0 | 212 | return $array[$index]; |
michael@0 | 213 | } |
michael@0 | 214 | |
michael@0 | 215 | 1; # return true |
michael@0 | 216 | |
michael@0 | 217 | ################################################################################ |
michael@0 | 218 | # |
michael@0 | 219 | # [1] in looking at the test results, in almost all cases, the |
michael@0 | 220 | # round-trip time measured by the server logic and the client logic |
michael@0 | 221 | # would be almost the same value (which is what one would |
michael@0 | 222 | # expect). However, on occasion, one of the them would be "out of |
michael@0 | 223 | # whack", and inconsistent with the additional "layout" measure by the |
michael@0 | 224 | # client. |
michael@0 | 225 | # |
michael@0 | 226 | # i.e., a set of numbers like these: |
michael@0 | 227 | # c_part c_intvl s_intvl |
michael@0 | 228 | # 800 1003 997 |
michael@0 | 229 | # 804 1007 1005 |
michael@0 | 230 | # 801 1001 1325 <-- |
michael@0 | 231 | # 803 1318 998 <-- |
michael@0 | 232 | # 799 1002 1007 |
michael@0 | 233 | # ... |
michael@0 | 234 | # |
michael@0 | 235 | # which looks like the server side would stall in doing the accept or |
michael@0 | 236 | # in running the mod-perl handler (possibly a GC?). (The following |
michael@0 | 237 | # c_intvl would then be out of whack by a matching amount on the next |
michael@0 | 238 | # cycle). |
michael@0 | 239 | # |
michael@0 | 240 | # At any rate, since it was clear from comparing with the 'c_part' |
michael@0 | 241 | # measure, which of the times was bogus, I just use an arbitrary error |
michael@0 | 242 | # measure to determine when to toss out the "bad" value. |
michael@0 | 243 | # |