tools/page-loader/URLTimingDataSet.pm

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 # 
     2 # This Source Code Form is subject to the terms of the Mozilla Public
     3 # License, v. 2.0. If a copy of the MPL was not distributed with this
     4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 package URLTimingDataSet;
     6 use DBI;
     7 use PageData;       # list of test pages, etc.
     8 use strict;
    10 sub new {
    11     my $proto = shift;
    12     my $class = ref($proto) || $proto;
    13     my $self  = {
    14         dataset   => [],
    15         results   => [],
    16         sorted    => [],
    17         average   => undef,
    18         avgmedian => undef,    # note: average of individual medians
    19         maximum   => undef,
    20         minimum   => undef,
    21     };
    22     $self->{id}    = shift || die "No id supplied"; 
    23     $self->{table} = shift || "t" . $self->{id};
    24     $self->{pages} = PageData->new;
    25     bless ($self, $class);
    26     $self->_grok();
    27     return $self;
    28 }
    31 sub _grok {    
    32     my $self = shift;
    33     my ($res);
    35     # select the dataset from the db
    36     $self->_select();
    38     for (my $i=0; $i < $self->{pages}->length; $i++) {
    39         my $name = $self->{pages}->name($i);
    40         my $count = 0;
    41         my @times = (); 
    42         my $nan = 0;
    43         foreach my $ref (@{$self->{dataset}}) {
    44             next if ($name ne $ref->{content});
    45             $count++;
    46             if ($ref->{c_part} eq "NaN") { 
    47                 # we bailed out of this page load
    48                 $res = "NaN";
    49                 $nan = 1;
    50             }
    51             else {
    52                 my $s_intvl = $ref->{s_intvl};
    53                 my $c_intvl = $ref->{c_intvl};
    54                 my $errval = abs($s_intvl-$c_intvl)/(($s_intvl+$c_intvl)/2);
    55                 if ($errval > 0.08) {       # one of them went wrong and stalled out (see [1] below)
    56                     $res = ($s_intvl <= $c_intvl) ? $s_intvl : $c_intvl;
    57                 } else {
    58                     $res = int(($s_intvl + $c_intvl)/2);
    59                 }
    60             }
    61             push @times, $res;
    62         }
    64         my $avg = int(_avg(@times));
    65         my $med = _med(@times);
    66         my $max = $nan ? "NaN" : _max(@times);
    67         my $min = _min(@times);
    68         push @{$self->{results}}, [ $i, $name, $count, $avg, $med, $max, $min, @times ];
    69     }
    71     $self->_get_summary();
    72     $self->_sort_result_set();
    74 }
    76 sub _select {
    77     my $self = shift;
    79     my $dbh = DBI->connect("DBI:CSV:f_dir=./db", {RaiseError => 1, AutoCommit => 1})
    80          or die "Cannot connect: " . $DBI::errstr;
    82     my $sql = qq{
    83         SELECT INDEX, S_INTVL, C_INTVL, C_PART, CONTENT, ID 
    84              FROM $self->{table}
    85                   WHERE ID = '$self->{id}'
    86                 };
    88     my $sth = $dbh->prepare($sql);
    89     $sth->execute();
    91     while (my @data = $sth->fetchrow_array()) {
    92         push @{$self->{dataset}}, 
    93         {index   => $data[0],
    94          s_intvl => $data[1],
    95          c_intvl => $data[2],
    96          c_part  => $data[3],
    97          content => $data[4],
    98          id      => $data[5]
    99           };
   100     }
   101     $sth->finish();
   102     $dbh->disconnect();
   103 }
   105 sub _get_summary {
   106     my $self = shift;
   107     my (@avg, @med, @max, @min);
   109     # how many pages were loaded in total ('sampled')
   110     $self->{samples} = scalar(@{$self->{dataset}}); 
   112     # how many cycles (should I get this from test parameters instead?)
   113     $self->{count} = int(_avg( map($_->[2],  @{$self->{results}}) ));
   114     #warn $self->{count};
   116     # calculate overall average, average median, maximum, minimum, (RMS Error?)
   117     for (@{$self->{results}}) {
   118         push @avg, $_->[3];
   119         push @med, $_->[4];
   120         push @max, $_->[5];
   121         push @min, $_->[6];
   122     }
   123     $self->{average}   = int(_avg(@avg));
   124     $self->{avgmedian} = int(_avg(@med));     # note: averaging individual medians
   125     $self->{maximum}   = _max(@max);
   126     $self->{minimum}   = _min(@min);
   127 }
   129 sub _sort_result_set {
   130     my $self = shift;
   131     # sort by median load time
   132     # @{$self->{sorted}} = sort {$a->[4] <=> $b->[4]} @{$self->{results}};
   133     # might be "NaN", but this is lame of me to be carrying around a string instead of undef
   134     @{$self->{sorted}} = 
   135          sort {
   136              if ($a->[4] eq "NaN" || $b->[4] eq "NaN") {
   137                  return $a->[4] cmp $b->[4]; 
   138              } else {
   139                  return $a->[4] <=> $b->[4];
   140              }
   141          } @{$self->{results}};
   142 }
   144 sub as_string {
   145     my $self = shift;
   146     return $self->_as_string();
   147 }
   149 sub as_string_sorted {
   150     my $self = shift;
   151     return $self->_as_string(@{$self->{sorted}});
   152 }
   155 sub _as_string {
   156     my $self = shift;
   157     my @ary = @_ ? @_ : @{$self->{results}};
   158     my $str;
   159     for (@ary) {
   160         my ($index, $path, $count, $avg, $med, $max, $min, @times) = @$_;
   161         $str .= sprintf "%3s %-26s\t", $index, $path;
   162         if ($count > 0) {
   163             $str .= sprintf "%6s %6s %6s %6s ", $avg, $med, $max, $min;
   164             foreach my $time (@times) {
   165                 $str .= sprintf "%6s ", $time;
   166             }
   167         }
   168         $str .= "\n";
   169     }
   170     return $str;
   171 }
   173 #
   174 # package internal helper functions
   175 #
   176 sub _num {
   177     my @array = ();
   178     for (@_) { push @array, $_ if /^[+-]?\d+\.?\d*$/o; }
   179     return @array;
   180 }
   182 sub _avg {
   183     my @array = _num(@_);
   184     return "NaN" unless scalar(@array);
   185     my $sum = 0;
   186     for (@array) { $sum += $_; }
   187     return $sum/scalar(@array);
   188 }
   190 sub _max {
   191     my @array = _num(@_);
   192     return "NaN" unless scalar(@array);
   193     my $max = $array[0];
   194     for (@array) { $max = ($max > $_) ? $max : $_; }
   195     return $max;
   196 }
   198 sub _min {
   199     my @array = _num(@_);
   200     return "NaN" unless scalar(@array);
   201     my $min = $array[0];
   202     for (@array) { $min = ($min < $_) ? $min : $_; }
   203     return $min;
   204 }
   206 # returns the floor(N/2) element of a sorted ascending array
   207 sub _med {
   208     my @array = _num(@_);
   209     return "NaN" unless scalar(@array);
   210     my $index = int((scalar(@array)-1)/2);
   211     @array = sort {$a <=> $b} @array;
   212     return $array[$index];
   213 }
   215 1; # return true
   217 ################################################################################
   218 #
   219 # [1] in looking at the test results, in almost all cases, the
   220 # round-trip time measured by the server logic and the client logic
   221 # would be almost the same value (which is what one would
   222 # expect). However, on occasion, one of the them would be "out of
   223 # whack", and inconsistent with the additional "layout" measure by the
   224 # client.
   225 #
   226 #    i.e., a set of numbers like these:
   227 #      c_part     c_intvl      s_intvl
   228 #      800      1003        997
   229 #      804      1007        1005
   230 #      801      1001        1325             <--
   231 #      803      1318        998              <--
   232 #      799      1002        1007
   233 #      ...
   234 #
   235 # which looks like the server side would stall in doing the accept or
   236 # in running the mod-perl handler (possibly a GC?). (The following
   237 # c_intvl would then be out of whack by a matching amount on the next
   238 # cycle).
   239 #
   240 # At any rate, since it was clear from comparing with the 'c_part'
   241 # measure, which of the times was bogus, I just use an arbitrary error
   242 # measure to determine when to toss out the "bad" value.
   243 #

mercurial