The Tor Browser: tools/page-loader/PageData.pm@97036ab72558

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

1 #

     2 # This Source Code Form is subject to the terms of the Mozilla Public

     3 # License, v. 2.0. If a copy of the MPL was not distributed with this

     4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

     5 package PageData;

     6 use strict;

     7 use vars qw($MagicString $ClientJS); # defined at end of file

9 #

    10 # contains a set of URLs and other meta information about them

    11 #

    12 sub new {

    13     my $proto = shift;

    14     my $class = ref($proto) || $proto;

    15     my $self  = {

    16         ClientJS    => $ClientJS,

    17         MagicString => $MagicString,

    18         PageHash    => {},

    19         PageList    => [],

    20         Length      => undef,

    21         FileBase    => undef,

    22         HTTPBase    => undef

    23     };

    24     bless ($self, $class);

    25     $self->_init();

    26     return $self;

    27 }

    30 #

    31 # Parse a config file in the current directory for info.

    32 # All requests to the current cgi-bin path will use the same info;

    33 # to set up specialized lists, create a separate cgi-bin subdir

    34 #

    35 sub _init {

    37     my $self = shift;

    39     my $file = "urllist.txt";

    40     open(FILE, "< $file") ||

    41          die "Can't open file $file: $!";

    43     while (<FILE>) {

    44         next if /^$/;

    45         next if /^#|^\s+#/;

    46         s/\s+#.*$//;

    47         if (/^HTTPBASE:\s+(.*)$/i) {

    48             $self->{HTTPBase} = $1;

    49         } elsif (/^FILEBASE:\s+(.*)$/i) {

    50             $self->{FileBase} = $1;

    51         } else {

    52             #

    53             # each of the remaining lines are:

    54             #   (1) the subdirectory containing the content for this URL,

    55             #   (2) the name of the top-level document [optional, default='index.html']

    56             #   (3) mime type for this document [optional, default is text/html]

    57             #   (4) a character set for this document [optional, default is none]

    58             # e.g.,

    59             #  home.netscape.com

    60             #  www.mozilla.org      index.html

    61             #  www.aol.com          default.xml      text/xml

    62             #  www.jp.aol.com       index.html       text/html   Shift_JIS

    63             #

    64             my @ary = split(/\s+/, $_);

    65             $ary[1] ||= 'index.html';

    66             push @{$self->{PageList}}, { Name    => $ary[0],

    67                                          URL     => $ary[0] . '/' . $ary[1],

    68                                          MimeType => $ary[2] || "text/html",

    69                                          CharSet => $ary[3] || ''

    70                                          };

    71         }

    72     }

    74     # check that we have enough to go on

    75     die "Did not read any URLs" unless scalar(@{$self->{PageList}});

    76     die "Did not read a value for the http base" unless $self->{HTTPBase};

    77     die "Did not read a value for the file base" unless $self->{FileBase};

    79     $self->{Length}   = scalar(@{$self->{PageList}});

    80     $self->_createHashView();

    82 }

    85 sub _createHashView {

    86     # repackages the array, so it can be referenced by name

    87     my $self = shift;

    88     for my $i (0..$self->lastidx) {

    89         my $hash = $self->{PageList}[$i];

    90         #warn $i, " ", $hash, " ", %$hash;

    91         $self->{PageHash}{$hash->{Name}} = {

    92             Index => $i,

    93             URL   => $hash->{URL},

    94         };

    95     }

    96 }

    99 sub filebase    { my $self = shift; return $self->{FileBase}; }

   100 sub httpbase    { my $self = shift; return $self->{HTTPBase}; }

   101 sub length      { my $self = shift; return $self->{Length}; }

   102 sub lastidx     { my $self = shift; return $self->{Length} - 1; }

   103 sub magicString { my $self = shift; return $self->{MagicString}; }

   104 sub clientJS    { my $self = shift; return $self->{ClientJS}; }

   107 sub url {

   108     # get the relative url by index or by name

   109     my $self = shift;

   110     my $arg  = shift;

   111     if ($arg =~ /^\d+$/) {

   112         return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{URL} : "";

   113     } else {

   114         return $self->{PageHash}{$arg}{URL};

   115     }

   116 }

   119 sub charset {

   120     # get the charset for this URL, by index

   121     my $self = shift;

   122     my $arg  = shift;

   123     if ($arg =~ /^\d+$/) {

   124         return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{CharSet} : "";

   125     } else {

   126         die "$arg' is not a numeric index";

   127     }

   128 }

   131 sub mimetype {

   132     # get the mimetype for this URL, by index

   133     my $self = shift;

   134     my $arg  = shift;

   135     if ($arg =~ /^\d+$/) {

   136         return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{MimeType} : "";

   137     } else {

   138         die "$arg' is not a numeric index";

   139     }

   140 }

   143 sub name {

   144     my $self = shift;

   145     my $arg  = shift;

   146     if ($arg =~ /^\d+$/) {

   147         return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{Name} : "";

   148     } else {

   149         #warn "You looked up the name using a name.";

   150         return $arg;

   151     }

   152 }

   155 sub index {

   156     my $self = shift;

   157     my $arg = shift;

   158     if ($arg =~ /^\d+$/) {

   159         #warn "You looked up the index using an index.";

   160         return $arg;

   161     } else {

   162         return $self->{PageHash}{$arg}{Index};

   163     }

   164 }

   167 sub _checkIndex {

   168     my $self = shift;

   169     my $idx = shift;

   170     die "Bogus index passed to PageData: $idx"

   171         unless defined($idx) &&

   172                $idx =~ /^\d+$/ &&

   173                $idx >= 0 &&

   174                $idx < $self->{Length};

   175     return 1;

   176 }

   179 #

   180 # JS to insert in the static HTML pages to trigger client timimg and reloading.

   181 # You must escape any '$', '@', '\n' contained in the JS code fragment. Otherwise,

   182 # perl will attempt to interpret them, and silently convert " $foo " to "  ".

   183 #

   184 # JS globals have been intentionally "uglified" with 'moztest_', to avoid collision

   185 # with existing content in the page

   186 #

   187 $MagicString = '<!-- MOZ_INSERT_CONTENT_HOOK -->';

   188 $ClientJS    =<<"ENDOFJS";

   190 //<![CDATA[

   192 function moztest_tokenizeQuery() {

   193   var query = {};

   194   var pairs = document.location.search.substring(1).split('&');

   195   for (var i=0; i < pairs.length; i++) {

   196     var pair = pairs[i].split('=');

   197     query[pair[0]] = unescape(pair[1]);

   198   }

   199   return query;

   200 }

   202 function moztest_setLocationHref(href, useReplace) {

   203     // false => "Location.href=url", not ".replace(url)"

   204     if (useReplace) {

   205         document.location.replace(href);

   206     } else {

   207         document.location.href = href;

   208     }

   209 }

   211 var g_moztest_Href;

   212 function moztest_nextRequest(c_part) {

   213     function getValue(arg,def) {

   214         return !isNaN(arg) ? parseInt(Number(arg)) : def;

   215     }

   216     var q = moztest_tokenizeQuery();

   217     var index    = getValue(q['index'],   0);

   218     var cycle    = getValue(q['cycle'],   0);

   219     var maxcyc   = getValue(q['maxcyc'],  1);

   220     var replace  = getValue(q['replace'], 0);

   221     var nocache  = getValue(q['nocache'], 0);

   222     var delay    = getValue(q['delay'],   0);

   223     var timeout  = getValue(q['timeout'], 30000);

   224     var c_ts     = getValue(q['c_ts'],    Number.NaN);

   226     // check for times

   227     var now      = (new Date()).getTime();

   228     var c_intvl  = now - c_ts;

   229     var c_ts     = now + delay; // adjust for delay time

   231     // Now make the request ...

   232     g_moztest_Href = document.location.href.split('?')[0] +

   233         "?c_part="  + c_part +

   234         "&index="   + ++index +   // increment the request index

   235         "&id="      + q['id'] +

   236         "&maxcyc="  + maxcyc +

   237         "&replace=" + replace +

   238         "&nocache=" + nocache +

   239         "&delay="   + delay +

   240         "&timeout=" + timeout +

   241         "&c_intvl=" + c_intvl +

   242         "&s_ts="    + g_moztest_ServerTime +

   243         "&c_ts="    + c_ts +

   244         "&content=" + g_moztest_Content;

   245     window.setTimeout("moztest_setLocationHref(g_moztest_Href,false);", delay);

   246     return true;

   247 }

   249 function moztest_onDocumentLoad() {

   250   var loadTime = (new Date()).getTime() - g_moztest_Start;

   251   window.clearTimeout(g_moztest_safetyTimer); // the onload has fired, clear the safety

   252   moztest_nextRequest(loadTime);

   253 }

   255 function moztest_safetyValve() {

   256   moztest_nextRequest(Number.NaN);         // if the onload never fires

   257 }

   259 // normal processing is to calculate load time and fetch another URL

   260 window.onload = moztest_onDocumentLoad;

   262 //]]>

   264 ENDOFJS

   266 1; # return true from module

The Tor Browser / file revision

tools/page-loader/PageData.pm@97036ab72558

tools/page-loader/PageData.pm