tools/page-loader/PageData.pm

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 #
     2 # This Source Code Form is subject to the terms of the Mozilla Public
     3 # License, v. 2.0. If a copy of the MPL was not distributed with this
     4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 package PageData;
     6 use strict;
     7 use vars qw($MagicString $ClientJS); # defined at end of file
     9 #
    10 # contains a set of URLs and other meta information about them
    11 #
    12 sub new {
    13     my $proto = shift;
    14     my $class = ref($proto) || $proto;
    15     my $self  = {
    16         ClientJS    => $ClientJS,
    17         MagicString => $MagicString,
    18         PageHash    => {},
    19         PageList    => [],
    20         Length      => undef,
    21         FileBase    => undef,
    22         HTTPBase    => undef
    23     };
    24     bless ($self, $class);
    25     $self->_init();
    26     return $self;
    27 }
    30 #
    31 # Parse a config file in the current directory for info.
    32 # All requests to the current cgi-bin path will use the same info;
    33 # to set up specialized lists, create a separate cgi-bin subdir
    34 #
    35 sub _init {
    37     my $self = shift;
    39     my $file = "urllist.txt";
    40     open(FILE, "< $file") ||
    41          die "Can't open file $file: $!";
    43     while (<FILE>) {
    44         next if /^$/;
    45         next if /^#|^\s+#/;
    46         s/\s+#.*$//;
    47         if (/^HTTPBASE:\s+(.*)$/i) {
    48             $self->{HTTPBase} = $1;
    49         } elsif (/^FILEBASE:\s+(.*)$/i) {
    50             $self->{FileBase} = $1;
    51         } else { 
    52             # 
    53             # each of the remaining lines are: 
    54             #   (1) the subdirectory containing the content for this URL,
    55             #   (2) the name of the top-level document [optional, default='index.html']
    56             #   (3) mime type for this document [optional, default is text/html]
    57             #   (4) a character set for this document [optional, default is none]
    58             # e.g., 
    59             #  home.netscape.com
    60             #  www.mozilla.org      index.html
    61             #  www.aol.com          default.xml      text/xml
    62             #  www.jp.aol.com       index.html       text/html   Shift_JIS
    63             #
    64             my @ary = split(/\s+/, $_);
    65             $ary[1] ||= 'index.html';
    66             push @{$self->{PageList}}, { Name    => $ary[0], 
    67                                          URL     => $ary[0] . '/' . $ary[1],
    68                                          MimeType => $ary[2] || "text/html",
    69                                          CharSet => $ary[3] || ''
    70                                          };
    71         }
    72     }
    74     # check that we have enough to go on
    75     die "Did not read any URLs" unless scalar(@{$self->{PageList}});
    76     die "Did not read a value for the http base" unless $self->{HTTPBase};
    77     die "Did not read a value for the file base" unless $self->{FileBase};
    79     $self->{Length}   = scalar(@{$self->{PageList}});
    80     $self->_createHashView();
    82 }
    85 sub _createHashView {
    86     # repackages the array, so it can be referenced by name
    87     my $self = shift;
    88     for my $i (0..$self->lastidx) {
    89         my $hash = $self->{PageList}[$i];
    90         #warn $i, " ", $hash, " ", %$hash;
    91         $self->{PageHash}{$hash->{Name}} = {
    92             Index => $i,
    93             URL   => $hash->{URL},
    94         };
    95     }
    96 }
    99 sub filebase    { my $self = shift; return $self->{FileBase}; }
   100 sub httpbase    { my $self = shift; return $self->{HTTPBase}; }
   101 sub length      { my $self = shift; return $self->{Length}; }
   102 sub lastidx     { my $self = shift; return $self->{Length} - 1; }
   103 sub magicString { my $self = shift; return $self->{MagicString}; }
   104 sub clientJS    { my $self = shift; return $self->{ClientJS}; }
   107 sub url {
   108     # get the relative url by index or by name
   109     my $self = shift;
   110     my $arg  = shift;
   111     if ($arg =~ /^\d+$/) {
   112         return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{URL} : "";
   113     } else {
   114         return $self->{PageHash}{$arg}{URL};
   115     }
   116 }
   119 sub charset {
   120     # get the charset for this URL, by index
   121     my $self = shift;
   122     my $arg  = shift;
   123     if ($arg =~ /^\d+$/) {
   124         return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{CharSet} : "";
   125     } else {
   126         die "$arg' is not a numeric index";
   127     }
   128 }
   131 sub mimetype {
   132     # get the mimetype for this URL, by index
   133     my $self = shift;
   134     my $arg  = shift;
   135     if ($arg =~ /^\d+$/) {
   136         return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{MimeType} : "";
   137     } else {
   138         die "$arg' is not a numeric index";
   139     }
   140 }
   143 sub name {
   144     my $self = shift;
   145     my $arg  = shift;
   146     if ($arg =~ /^\d+$/) {
   147         return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{Name} : "";
   148     } else {
   149         #warn "You looked up the name using a name.";
   150         return $arg;
   151     }
   152 }
   155 sub index {
   156     my $self = shift;
   157     my $arg = shift;
   158     if ($arg =~ /^\d+$/) {
   159         #warn "You looked up the index using an index.";
   160         return $arg;
   161     } else {
   162         return $self->{PageHash}{$arg}{Index};
   163     }
   164 }
   167 sub _checkIndex {
   168     my $self = shift;
   169     my $idx = shift;
   170     die "Bogus index passed to PageData: $idx"
   171         unless defined($idx) &&
   172                $idx =~ /^\d+$/ &&
   173                $idx >= 0 &&
   174                $idx < $self->{Length};
   175     return 1;
   176 }
   179 #
   180 # JS to insert in the static HTML pages to trigger client timimg and reloading.
   181 # You must escape any '$', '@', '\n' contained in the JS code fragment. Otherwise,
   182 # perl will attempt to interpret them, and silently convert " $foo " to "  ".
   183 #
   184 # JS globals have been intentionally "uglified" with 'moztest_', to avoid collision
   185 # with existing content in the page
   186 #
   187 $MagicString = '<!-- MOZ_INSERT_CONTENT_HOOK -->';
   188 $ClientJS    =<<"ENDOFJS";
   190 //<![CDATA[
   192 function moztest_tokenizeQuery() {
   193   var query = {};
   194   var pairs = document.location.search.substring(1).split('&');
   195   for (var i=0; i < pairs.length; i++) {
   196     var pair = pairs[i].split('=');
   197     query[pair[0]] = unescape(pair[1]);
   198   }
   199   return query;
   200 }
   202 function moztest_setLocationHref(href, useReplace) {
   203     // false => "Location.href=url", not ".replace(url)"
   204     if (useReplace) {
   205         document.location.replace(href);
   206     } else {
   207         document.location.href = href;
   208     }
   209 }
   211 var g_moztest_Href;
   212 function moztest_nextRequest(c_part) {
   213     function getValue(arg,def) {
   214         return !isNaN(arg) ? parseInt(Number(arg)) : def;
   215     }
   216     var q = moztest_tokenizeQuery();
   217     var index    = getValue(q['index'],   0);
   218     var cycle    = getValue(q['cycle'],   0);
   219     var maxcyc   = getValue(q['maxcyc'],  1);
   220     var replace  = getValue(q['replace'], 0);
   221     var nocache  = getValue(q['nocache'], 0);
   222     var delay    = getValue(q['delay'],   0);
   223     var timeout  = getValue(q['timeout'], 30000);
   224     var c_ts     = getValue(q['c_ts'],    Number.NaN);
   226     // check for times
   227     var now      = (new Date()).getTime();
   228     var c_intvl  = now - c_ts;
   229     var c_ts     = now + delay; // adjust for delay time
   231     // Now make the request ...
   232     g_moztest_Href = document.location.href.split('?')[0] +
   233         "?c_part="  + c_part +
   234         "&index="   + ++index +   // increment the request index
   235         "&id="      + q['id'] +
   236         "&maxcyc="  + maxcyc +
   237         "&replace=" + replace +
   238         "&nocache=" + nocache +
   239         "&delay="   + delay +
   240         "&timeout=" + timeout +
   241         "&c_intvl=" + c_intvl +
   242         "&s_ts="    + g_moztest_ServerTime +
   243         "&c_ts="    + c_ts +
   244         "&content=" + g_moztest_Content;
   245     window.setTimeout("moztest_setLocationHref(g_moztest_Href,false);", delay);
   246     return true;
   247 }
   249 function moztest_onDocumentLoad() {
   250   var loadTime = (new Date()).getTime() - g_moztest_Start;
   251   window.clearTimeout(g_moztest_safetyTimer); // the onload has fired, clear the safety
   252   moztest_nextRequest(loadTime);
   253 }
   255 function moztest_safetyValve() {
   256   moztest_nextRequest(Number.NaN);         // if the onload never fires
   257 }
   259 // normal processing is to calculate load time and fetch another URL
   260 window.onload = moztest_onDocumentLoad;
   262 //]]>
   264 ENDOFJS
   266 1; # return true from module

mercurial