|
1 # |
|
2 # This Source Code Form is subject to the terms of the Mozilla Public |
|
3 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5 package URLTimingDataSet; |
|
6 use DBI; |
|
7 use PageData; # list of test pages, etc. |
|
8 use strict; |
|
9 |
|
10 sub new { |
|
11 my $proto = shift; |
|
12 my $class = ref($proto) || $proto; |
|
13 my $self = { |
|
14 dataset => [], |
|
15 results => [], |
|
16 sorted => [], |
|
17 average => undef, |
|
18 avgmedian => undef, # note: average of individual medians |
|
19 maximum => undef, |
|
20 minimum => undef, |
|
21 }; |
|
22 $self->{id} = shift || die "No id supplied"; |
|
23 $self->{table} = shift || "t" . $self->{id}; |
|
24 $self->{pages} = PageData->new; |
|
25 bless ($self, $class); |
|
26 $self->_grok(); |
|
27 return $self; |
|
28 } |
|
29 |
|
30 |
|
31 sub _grok { |
|
32 my $self = shift; |
|
33 my ($res); |
|
34 |
|
35 # select the dataset from the db |
|
36 $self->_select(); |
|
37 |
|
38 for (my $i=0; $i < $self->{pages}->length; $i++) { |
|
39 my $name = $self->{pages}->name($i); |
|
40 my $count = 0; |
|
41 my @times = (); |
|
42 my $nan = 0; |
|
43 foreach my $ref (@{$self->{dataset}}) { |
|
44 next if ($name ne $ref->{content}); |
|
45 $count++; |
|
46 if ($ref->{c_part} eq "NaN") { |
|
47 # we bailed out of this page load |
|
48 $res = "NaN"; |
|
49 $nan = 1; |
|
50 } |
|
51 else { |
|
52 my $s_intvl = $ref->{s_intvl}; |
|
53 my $c_intvl = $ref->{c_intvl}; |
|
54 my $errval = abs($s_intvl-$c_intvl)/(($s_intvl+$c_intvl)/2); |
|
55 if ($errval > 0.08) { # one of them went wrong and stalled out (see [1] below) |
|
56 $res = ($s_intvl <= $c_intvl) ? $s_intvl : $c_intvl; |
|
57 } else { |
|
58 $res = int(($s_intvl + $c_intvl)/2); |
|
59 } |
|
60 } |
|
61 push @times, $res; |
|
62 } |
|
63 |
|
64 my $avg = int(_avg(@times)); |
|
65 my $med = _med(@times); |
|
66 my $max = $nan ? "NaN" : _max(@times); |
|
67 my $min = _min(@times); |
|
68 push @{$self->{results}}, [ $i, $name, $count, $avg, $med, $max, $min, @times ]; |
|
69 } |
|
70 |
|
71 $self->_get_summary(); |
|
72 $self->_sort_result_set(); |
|
73 |
|
74 } |
|
75 |
|
76 sub _select { |
|
77 my $self = shift; |
|
78 |
|
79 my $dbh = DBI->connect("DBI:CSV:f_dir=./db", {RaiseError => 1, AutoCommit => 1}) |
|
80 or die "Cannot connect: " . $DBI::errstr; |
|
81 |
|
82 my $sql = qq{ |
|
83 SELECT INDEX, S_INTVL, C_INTVL, C_PART, CONTENT, ID |
|
84 FROM $self->{table} |
|
85 WHERE ID = '$self->{id}' |
|
86 }; |
|
87 |
|
88 my $sth = $dbh->prepare($sql); |
|
89 $sth->execute(); |
|
90 |
|
91 while (my @data = $sth->fetchrow_array()) { |
|
92 push @{$self->{dataset}}, |
|
93 {index => $data[0], |
|
94 s_intvl => $data[1], |
|
95 c_intvl => $data[2], |
|
96 c_part => $data[3], |
|
97 content => $data[4], |
|
98 id => $data[5] |
|
99 }; |
|
100 } |
|
101 $sth->finish(); |
|
102 $dbh->disconnect(); |
|
103 } |
|
104 |
|
105 sub _get_summary { |
|
106 my $self = shift; |
|
107 my (@avg, @med, @max, @min); |
|
108 |
|
109 # how many pages were loaded in total ('sampled') |
|
110 $self->{samples} = scalar(@{$self->{dataset}}); |
|
111 |
|
112 # how many cycles (should I get this from test parameters instead?) |
|
113 $self->{count} = int(_avg( map($_->[2], @{$self->{results}}) )); |
|
114 #warn $self->{count}; |
|
115 |
|
116 # calculate overall average, average median, maximum, minimum, (RMS Error?) |
|
117 for (@{$self->{results}}) { |
|
118 push @avg, $_->[3]; |
|
119 push @med, $_->[4]; |
|
120 push @max, $_->[5]; |
|
121 push @min, $_->[6]; |
|
122 } |
|
123 $self->{average} = int(_avg(@avg)); |
|
124 $self->{avgmedian} = int(_avg(@med)); # note: averaging individual medians |
|
125 $self->{maximum} = _max(@max); |
|
126 $self->{minimum} = _min(@min); |
|
127 } |
|
128 |
|
129 sub _sort_result_set { |
|
130 my $self = shift; |
|
131 # sort by median load time |
|
132 # @{$self->{sorted}} = sort {$a->[4] <=> $b->[4]} @{$self->{results}}; |
|
133 # might be "NaN", but this is lame of me to be carrying around a string instead of undef |
|
134 @{$self->{sorted}} = |
|
135 sort { |
|
136 if ($a->[4] eq "NaN" || $b->[4] eq "NaN") { |
|
137 return $a->[4] cmp $b->[4]; |
|
138 } else { |
|
139 return $a->[4] <=> $b->[4]; |
|
140 } |
|
141 } @{$self->{results}}; |
|
142 } |
|
143 |
|
144 sub as_string { |
|
145 my $self = shift; |
|
146 return $self->_as_string(); |
|
147 } |
|
148 |
|
149 sub as_string_sorted { |
|
150 my $self = shift; |
|
151 return $self->_as_string(@{$self->{sorted}}); |
|
152 } |
|
153 |
|
154 |
|
155 sub _as_string { |
|
156 my $self = shift; |
|
157 my @ary = @_ ? @_ : @{$self->{results}}; |
|
158 my $str; |
|
159 for (@ary) { |
|
160 my ($index, $path, $count, $avg, $med, $max, $min, @times) = @$_; |
|
161 $str .= sprintf "%3s %-26s\t", $index, $path; |
|
162 if ($count > 0) { |
|
163 $str .= sprintf "%6s %6s %6s %6s ", $avg, $med, $max, $min; |
|
164 foreach my $time (@times) { |
|
165 $str .= sprintf "%6s ", $time; |
|
166 } |
|
167 } |
|
168 $str .= "\n"; |
|
169 } |
|
170 return $str; |
|
171 } |
|
172 |
|
173 # |
|
174 # package internal helper functions |
|
175 # |
|
176 sub _num { |
|
177 my @array = (); |
|
178 for (@_) { push @array, $_ if /^[+-]?\d+\.?\d*$/o; } |
|
179 return @array; |
|
180 } |
|
181 |
|
182 sub _avg { |
|
183 my @array = _num(@_); |
|
184 return "NaN" unless scalar(@array); |
|
185 my $sum = 0; |
|
186 for (@array) { $sum += $_; } |
|
187 return $sum/scalar(@array); |
|
188 } |
|
189 |
|
190 sub _max { |
|
191 my @array = _num(@_); |
|
192 return "NaN" unless scalar(@array); |
|
193 my $max = $array[0]; |
|
194 for (@array) { $max = ($max > $_) ? $max : $_; } |
|
195 return $max; |
|
196 } |
|
197 |
|
198 sub _min { |
|
199 my @array = _num(@_); |
|
200 return "NaN" unless scalar(@array); |
|
201 my $min = $array[0]; |
|
202 for (@array) { $min = ($min < $_) ? $min : $_; } |
|
203 return $min; |
|
204 } |
|
205 |
|
206 # returns the floor(N/2) element of a sorted ascending array |
|
207 sub _med { |
|
208 my @array = _num(@_); |
|
209 return "NaN" unless scalar(@array); |
|
210 my $index = int((scalar(@array)-1)/2); |
|
211 @array = sort {$a <=> $b} @array; |
|
212 return $array[$index]; |
|
213 } |
|
214 |
|
215 1; # return true |
|
216 |
|
217 ################################################################################ |
|
218 # |
|
219 # [1] in looking at the test results, in almost all cases, the |
|
220 # round-trip time measured by the server logic and the client logic |
|
221 # would be almost the same value (which is what one would |
|
222 # expect). However, on occasion, one of the them would be "out of |
|
223 # whack", and inconsistent with the additional "layout" measure by the |
|
224 # client. |
|
225 # |
|
226 # i.e., a set of numbers like these: |
|
227 # c_part c_intvl s_intvl |
|
228 # 800 1003 997 |
|
229 # 804 1007 1005 |
|
230 # 801 1001 1325 <-- |
|
231 # 803 1318 998 <-- |
|
232 # 799 1002 1007 |
|
233 # ... |
|
234 # |
|
235 # which looks like the server side would stall in doing the accept or |
|
236 # in running the mod-perl handler (possibly a GC?). (The following |
|
237 # c_intvl would then be out of whack by a matching amount on the next |
|
238 # cycle). |
|
239 # |
|
240 # At any rate, since it was clear from comparing with the 'c_part' |
|
241 # measure, which of the times was bogus, I just use an arbitrary error |
|
242 # measure to determine when to toss out the "bad" value. |
|
243 # |