The Tor Browser: diff tools/page-loader/URLTimingDataSet.pm

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/tools/page-loader/URLTimingDataSet.pm	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,243 @@
     1.4 +# 
     1.5 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.6 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.8 +package URLTimingDataSet;
     1.9 +use DBI;
    1.10 +use PageData;       # list of test pages, etc.
    1.11 +use strict;
    1.12 +
    1.13 +sub new {
    1.14 +    my $proto = shift;
    1.15 +    my $class = ref($proto) || $proto;
    1.16 +    my $self  = {
    1.17 +        dataset   => [],
    1.18 +        results   => [],
    1.19 +        sorted    => [],
    1.20 +        average   => undef,
    1.21 +        avgmedian => undef,    # note: average of individual medians
    1.22 +        maximum   => undef,
    1.23 +        minimum   => undef,
    1.24 +    };
    1.25 +    $self->{id}    = shift || die "No id supplied"; 
    1.26 +    $self->{table} = shift || "t" . $self->{id};
    1.27 +    $self->{pages} = PageData->new;
    1.28 +    bless ($self, $class);
    1.29 +    $self->_grok();
    1.30 +    return $self;
    1.31 +}
    1.32 +
    1.33 +
    1.34 +sub _grok {    
    1.35 +    my $self = shift;
    1.36 +    my ($res);
    1.37 +
    1.38 +    # select the dataset from the db
    1.39 +    $self->_select();
    1.40 +
    1.41 +    for (my $i=0; $i < $self->{pages}->length; $i++) {
    1.42 +        my $name = $self->{pages}->name($i);
    1.43 +        my $count = 0;
    1.44 +        my @times = (); 
    1.45 +        my $nan = 0;
    1.46 +        foreach my $ref (@{$self->{dataset}}) {
    1.47 +            next if ($name ne $ref->{content});
    1.48 +            $count++;
    1.49 +            if ($ref->{c_part} eq "NaN") { 
    1.50 +                # we bailed out of this page load
    1.51 +                $res = "NaN";
    1.52 +                $nan = 1;
    1.53 +            }
    1.54 +            else {
    1.55 +                my $s_intvl = $ref->{s_intvl};
    1.56 +                my $c_intvl = $ref->{c_intvl};
    1.57 +                my $errval = abs($s_intvl-$c_intvl)/(($s_intvl+$c_intvl)/2);
    1.58 +                if ($errval > 0.08) {       # one of them went wrong and stalled out (see [1] below)
    1.59 +                    $res = ($s_intvl <= $c_intvl) ? $s_intvl : $c_intvl;
    1.60 +                } else {
    1.61 +                    $res = int(($s_intvl + $c_intvl)/2);
    1.62 +                }
    1.63 +            }
    1.64 +            push @times, $res;
    1.65 +        }
    1.66 +
    1.67 +        my $avg = int(_avg(@times));
    1.68 +        my $med = _med(@times);
    1.69 +        my $max = $nan ? "NaN" : _max(@times);
    1.70 +        my $min = _min(@times);
    1.71 +        push @{$self->{results}}, [ $i, $name, $count, $avg, $med, $max, $min, @times ];
    1.72 +    }
    1.73 +    
    1.74 +    $self->_get_summary();
    1.75 +    $self->_sort_result_set();
    1.76 +    
    1.77 +}
    1.78 +
    1.79 +sub _select {
    1.80 +    my $self = shift;
    1.81 +
    1.82 +    my $dbh = DBI->connect("DBI:CSV:f_dir=./db", {RaiseError => 1, AutoCommit => 1})
    1.83 +         or die "Cannot connect: " . $DBI::errstr;
    1.84 +
    1.85 +    my $sql = qq{
    1.86 +        SELECT INDEX, S_INTVL, C_INTVL, C_PART, CONTENT, ID 
    1.87 +             FROM $self->{table}
    1.88 +                  WHERE ID = '$self->{id}'
    1.89 +                };
    1.90 + 
    1.91 +    my $sth = $dbh->prepare($sql);
    1.92 +    $sth->execute();
    1.93 +
    1.94 +    while (my @data = $sth->fetchrow_array()) {
    1.95 +        push @{$self->{dataset}}, 
    1.96 +        {index   => $data[0],
    1.97 +         s_intvl => $data[1],
    1.98 +         c_intvl => $data[2],
    1.99 +         c_part  => $data[3],
   1.100 +         content => $data[4],
   1.101 +         id      => $data[5]
   1.102 +          };
   1.103 +    }
   1.104 +    $sth->finish();
   1.105 +    $dbh->disconnect();
   1.106 +}
   1.107 +
   1.108 +sub _get_summary {
   1.109 +    my $self = shift;
   1.110 +    my (@avg, @med, @max, @min);
   1.111 +
   1.112 +    # how many pages were loaded in total ('sampled')
   1.113 +    $self->{samples} = scalar(@{$self->{dataset}}); 
   1.114 +
   1.115 +    # how many cycles (should I get this from test parameters instead?)
   1.116 +    $self->{count} = int(_avg( map($_->[2],  @{$self->{results}}) ));
   1.117 +    #warn $self->{count};
   1.118 +
   1.119 +    # calculate overall average, average median, maximum, minimum, (RMS Error?)
   1.120 +    for (@{$self->{results}}) {
   1.121 +        push @avg, $_->[3];
   1.122 +        push @med, $_->[4];
   1.123 +        push @max, $_->[5];
   1.124 +        push @min, $_->[6];
   1.125 +    }
   1.126 +    $self->{average}   = int(_avg(@avg));
   1.127 +    $self->{avgmedian} = int(_avg(@med));     # note: averaging individual medians
   1.128 +    $self->{maximum}   = _max(@max);
   1.129 +    $self->{minimum}   = _min(@min);
   1.130 +}
   1.131 +
   1.132 +sub _sort_result_set {
   1.133 +    my $self = shift;
   1.134 +    # sort by median load time
   1.135 +    # @{$self->{sorted}} = sort {$a->[4] <=> $b->[4]} @{$self->{results}};
   1.136 +    # might be "NaN", but this is lame of me to be carrying around a string instead of undef
   1.137 +    @{$self->{sorted}} = 
   1.138 +         sort {
   1.139 +             if ($a->[4] eq "NaN" || $b->[4] eq "NaN") {
   1.140 +                 return $a->[4] cmp $b->[4]; 
   1.141 +             } else {
   1.142 +                 return $a->[4] <=> $b->[4];
   1.143 +             }
   1.144 +         } @{$self->{results}};
   1.145 +}
   1.146 +
   1.147 +sub as_string {
   1.148 +    my $self = shift;
   1.149 +    return $self->_as_string();
   1.150 +}
   1.151 +
   1.152 +sub as_string_sorted {
   1.153 +    my $self = shift;
   1.154 +    return $self->_as_string(@{$self->{sorted}});
   1.155 +}
   1.156 +
   1.157 +
   1.158 +sub _as_string {
   1.159 +    my $self = shift;
   1.160 +    my @ary = @_ ? @_ : @{$self->{results}};
   1.161 +    my $str;
   1.162 +    for (@ary) {
   1.163 +        my ($index, $path, $count, $avg, $med, $max, $min, @times) = @$_;
   1.164 +        $str .= sprintf "%3s %-26s\t", $index, $path;
   1.165 +        if ($count > 0) {
   1.166 +            $str .= sprintf "%6s %6s %6s %6s ", $avg, $med, $max, $min;
   1.167 +            foreach my $time (@times) {
   1.168 +                $str .= sprintf "%6s ", $time;
   1.169 +            }
   1.170 +        }
   1.171 +        $str .= "\n";
   1.172 +    }
   1.173 +    return $str;
   1.174 +}
   1.175 +    
   1.176 +#
   1.177 +# package internal helper functions
   1.178 +#
   1.179 +sub _num {
   1.180 +    my @array = ();
   1.181 +    for (@_) { push @array, $_ if /^[+-]?\d+\.?\d*$/o; }
   1.182 +    return @array;
   1.183 +}
   1.184 +
   1.185 +sub _avg {
   1.186 +    my @array = _num(@_);
   1.187 +    return "NaN" unless scalar(@array);
   1.188 +    my $sum = 0;
   1.189 +    for (@array) { $sum += $_; }
   1.190 +    return $sum/scalar(@array);
   1.191 +}
   1.192 +
   1.193 +sub _max {
   1.194 +    my @array = _num(@_);
   1.195 +    return "NaN" unless scalar(@array);
   1.196 +    my $max = $array[0];
   1.197 +    for (@array) { $max = ($max > $_) ? $max : $_; }
   1.198 +    return $max;
   1.199 +}
   1.200 +
   1.201 +sub _min {
   1.202 +    my @array = _num(@_);
   1.203 +    return "NaN" unless scalar(@array);
   1.204 +    my $min = $array[0];
   1.205 +    for (@array) { $min = ($min < $_) ? $min : $_; }
   1.206 +    return $min;
   1.207 +}
   1.208 +
   1.209 +# returns the floor(N/2) element of a sorted ascending array
   1.210 +sub _med {
   1.211 +    my @array = _num(@_);
   1.212 +    return "NaN" unless scalar(@array);
   1.213 +    my $index = int((scalar(@array)-1)/2);
   1.214 +    @array = sort {$a <=> $b} @array;
   1.215 +    return $array[$index];
   1.216 +}
   1.217 +
   1.218 +1; # return true
   1.219 +
   1.220 +################################################################################
   1.221 +#
   1.222 +# [1] in looking at the test results, in almost all cases, the
   1.223 +# round-trip time measured by the server logic and the client logic
   1.224 +# would be almost the same value (which is what one would
   1.225 +# expect). However, on occasion, one of the them would be "out of
   1.226 +# whack", and inconsistent with the additional "layout" measure by the
   1.227 +# client.
   1.228 +#
   1.229 +#    i.e., a set of numbers like these:
   1.230 +#      c_part     c_intvl      s_intvl
   1.231 +#      800      1003        997
   1.232 +#      804      1007        1005
   1.233 +#      801      1001        1325             <--
   1.234 +#      803      1318        998              <--
   1.235 +#      799      1002        1007
   1.236 +#      ...
   1.237 +#
   1.238 +# which looks like the server side would stall in doing the accept or
   1.239 +# in running the mod-perl handler (possibly a GC?). (The following
   1.240 +# c_intvl would then be out of whack by a matching amount on the next
   1.241 +# cycle).
   1.242 +#
   1.243 +# At any rate, since it was clear from comparing with the 'c_part'
   1.244 +# measure, which of the times was bogus, I just use an arbitrary error
   1.245 +# measure to determine when to toss out the "bad" value.
   1.246 +#
The Tor Browser / file diff

diff: tools/page-loader/URLTimingDataSet.pm

tools/page-loader/URLTimingDataSet.pm