tools/page-loader/PageData.pm

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 #
michael@0 2 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 5 package PageData;
michael@0 6 use strict;
michael@0 7 use vars qw($MagicString $ClientJS); # defined at end of file
michael@0 8
michael@0 9 #
michael@0 10 # contains a set of URLs and other meta information about them
michael@0 11 #
michael@0 12 sub new {
michael@0 13 my $proto = shift;
michael@0 14 my $class = ref($proto) || $proto;
michael@0 15 my $self = {
michael@0 16 ClientJS => $ClientJS,
michael@0 17 MagicString => $MagicString,
michael@0 18 PageHash => {},
michael@0 19 PageList => [],
michael@0 20 Length => undef,
michael@0 21 FileBase => undef,
michael@0 22 HTTPBase => undef
michael@0 23 };
michael@0 24 bless ($self, $class);
michael@0 25 $self->_init();
michael@0 26 return $self;
michael@0 27 }
michael@0 28
michael@0 29
michael@0 30 #
michael@0 31 # Parse a config file in the current directory for info.
michael@0 32 # All requests to the current cgi-bin path will use the same info;
michael@0 33 # to set up specialized lists, create a separate cgi-bin subdir
michael@0 34 #
michael@0 35 sub _init {
michael@0 36
michael@0 37 my $self = shift;
michael@0 38
michael@0 39 my $file = "urllist.txt";
michael@0 40 open(FILE, "< $file") ||
michael@0 41 die "Can't open file $file: $!";
michael@0 42
michael@0 43 while (<FILE>) {
michael@0 44 next if /^$/;
michael@0 45 next if /^#|^\s+#/;
michael@0 46 s/\s+#.*$//;
michael@0 47 if (/^HTTPBASE:\s+(.*)$/i) {
michael@0 48 $self->{HTTPBase} = $1;
michael@0 49 } elsif (/^FILEBASE:\s+(.*)$/i) {
michael@0 50 $self->{FileBase} = $1;
michael@0 51 } else {
michael@0 52 #
michael@0 53 # each of the remaining lines are:
michael@0 54 # (1) the subdirectory containing the content for this URL,
michael@0 55 # (2) the name of the top-level document [optional, default='index.html']
michael@0 56 # (3) mime type for this document [optional, default is text/html]
michael@0 57 # (4) a character set for this document [optional, default is none]
michael@0 58 # e.g.,
michael@0 59 # home.netscape.com
michael@0 60 # www.mozilla.org index.html
michael@0 61 # www.aol.com default.xml text/xml
michael@0 62 # www.jp.aol.com index.html text/html Shift_JIS
michael@0 63 #
michael@0 64 my @ary = split(/\s+/, $_);
michael@0 65 $ary[1] ||= 'index.html';
michael@0 66 push @{$self->{PageList}}, { Name => $ary[0],
michael@0 67 URL => $ary[0] . '/' . $ary[1],
michael@0 68 MimeType => $ary[2] || "text/html",
michael@0 69 CharSet => $ary[3] || ''
michael@0 70 };
michael@0 71 }
michael@0 72 }
michael@0 73
michael@0 74 # check that we have enough to go on
michael@0 75 die "Did not read any URLs" unless scalar(@{$self->{PageList}});
michael@0 76 die "Did not read a value for the http base" unless $self->{HTTPBase};
michael@0 77 die "Did not read a value for the file base" unless $self->{FileBase};
michael@0 78
michael@0 79 $self->{Length} = scalar(@{$self->{PageList}});
michael@0 80 $self->_createHashView();
michael@0 81
michael@0 82 }
michael@0 83
michael@0 84
michael@0 85 sub _createHashView {
michael@0 86 # repackages the array, so it can be referenced by name
michael@0 87 my $self = shift;
michael@0 88 for my $i (0..$self->lastidx) {
michael@0 89 my $hash = $self->{PageList}[$i];
michael@0 90 #warn $i, " ", $hash, " ", %$hash;
michael@0 91 $self->{PageHash}{$hash->{Name}} = {
michael@0 92 Index => $i,
michael@0 93 URL => $hash->{URL},
michael@0 94 };
michael@0 95 }
michael@0 96 }
michael@0 97
michael@0 98
michael@0 99 sub filebase { my $self = shift; return $self->{FileBase}; }
michael@0 100 sub httpbase { my $self = shift; return $self->{HTTPBase}; }
michael@0 101 sub length { my $self = shift; return $self->{Length}; }
michael@0 102 sub lastidx { my $self = shift; return $self->{Length} - 1; }
michael@0 103 sub magicString { my $self = shift; return $self->{MagicString}; }
michael@0 104 sub clientJS { my $self = shift; return $self->{ClientJS}; }
michael@0 105
michael@0 106
michael@0 107 sub url {
michael@0 108 # get the relative url by index or by name
michael@0 109 my $self = shift;
michael@0 110 my $arg = shift;
michael@0 111 if ($arg =~ /^\d+$/) {
michael@0 112 return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{URL} : "";
michael@0 113 } else {
michael@0 114 return $self->{PageHash}{$arg}{URL};
michael@0 115 }
michael@0 116 }
michael@0 117
michael@0 118
michael@0 119 sub charset {
michael@0 120 # get the charset for this URL, by index
michael@0 121 my $self = shift;
michael@0 122 my $arg = shift;
michael@0 123 if ($arg =~ /^\d+$/) {
michael@0 124 return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{CharSet} : "";
michael@0 125 } else {
michael@0 126 die "$arg' is not a numeric index";
michael@0 127 }
michael@0 128 }
michael@0 129
michael@0 130
michael@0 131 sub mimetype {
michael@0 132 # get the mimetype for this URL, by index
michael@0 133 my $self = shift;
michael@0 134 my $arg = shift;
michael@0 135 if ($arg =~ /^\d+$/) {
michael@0 136 return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{MimeType} : "";
michael@0 137 } else {
michael@0 138 die "$arg' is not a numeric index";
michael@0 139 }
michael@0 140 }
michael@0 141
michael@0 142
michael@0 143 sub name {
michael@0 144 my $self = shift;
michael@0 145 my $arg = shift;
michael@0 146 if ($arg =~ /^\d+$/) {
michael@0 147 return $self->_checkIndex($arg) ? $self->{PageList}[$arg]{Name} : "";
michael@0 148 } else {
michael@0 149 #warn "You looked up the name using a name.";
michael@0 150 return $arg;
michael@0 151 }
michael@0 152 }
michael@0 153
michael@0 154
michael@0 155 sub index {
michael@0 156 my $self = shift;
michael@0 157 my $arg = shift;
michael@0 158 if ($arg =~ /^\d+$/) {
michael@0 159 #warn "You looked up the index using an index.";
michael@0 160 return $arg;
michael@0 161 } else {
michael@0 162 return $self->{PageHash}{$arg}{Index};
michael@0 163 }
michael@0 164 }
michael@0 165
michael@0 166
michael@0 167 sub _checkIndex {
michael@0 168 my $self = shift;
michael@0 169 my $idx = shift;
michael@0 170 die "Bogus index passed to PageData: $idx"
michael@0 171 unless defined($idx) &&
michael@0 172 $idx =~ /^\d+$/ &&
michael@0 173 $idx >= 0 &&
michael@0 174 $idx < $self->{Length};
michael@0 175 return 1;
michael@0 176 }
michael@0 177
michael@0 178
michael@0 179 #
michael@0 180 # JS to insert in the static HTML pages to trigger client timimg and reloading.
michael@0 181 # You must escape any '$', '@', '\n' contained in the JS code fragment. Otherwise,
michael@0 182 # perl will attempt to interpret them, and silently convert " $foo " to " ".
michael@0 183 #
michael@0 184 # JS globals have been intentionally "uglified" with 'moztest_', to avoid collision
michael@0 185 # with existing content in the page
michael@0 186 #
michael@0 187 $MagicString = '<!-- MOZ_INSERT_CONTENT_HOOK -->';
michael@0 188 $ClientJS =<<"ENDOFJS";
michael@0 189
michael@0 190 //<![CDATA[
michael@0 191
michael@0 192 function moztest_tokenizeQuery() {
michael@0 193 var query = {};
michael@0 194 var pairs = document.location.search.substring(1).split('&');
michael@0 195 for (var i=0; i < pairs.length; i++) {
michael@0 196 var pair = pairs[i].split('=');
michael@0 197 query[pair[0]] = unescape(pair[1]);
michael@0 198 }
michael@0 199 return query;
michael@0 200 }
michael@0 201
michael@0 202 function moztest_setLocationHref(href, useReplace) {
michael@0 203 // false => "Location.href=url", not ".replace(url)"
michael@0 204 if (useReplace) {
michael@0 205 document.location.replace(href);
michael@0 206 } else {
michael@0 207 document.location.href = href;
michael@0 208 }
michael@0 209 }
michael@0 210
michael@0 211 var g_moztest_Href;
michael@0 212 function moztest_nextRequest(c_part) {
michael@0 213 function getValue(arg,def) {
michael@0 214 return !isNaN(arg) ? parseInt(Number(arg)) : def;
michael@0 215 }
michael@0 216 var q = moztest_tokenizeQuery();
michael@0 217 var index = getValue(q['index'], 0);
michael@0 218 var cycle = getValue(q['cycle'], 0);
michael@0 219 var maxcyc = getValue(q['maxcyc'], 1);
michael@0 220 var replace = getValue(q['replace'], 0);
michael@0 221 var nocache = getValue(q['nocache'], 0);
michael@0 222 var delay = getValue(q['delay'], 0);
michael@0 223 var timeout = getValue(q['timeout'], 30000);
michael@0 224 var c_ts = getValue(q['c_ts'], Number.NaN);
michael@0 225
michael@0 226 // check for times
michael@0 227 var now = (new Date()).getTime();
michael@0 228 var c_intvl = now - c_ts;
michael@0 229 var c_ts = now + delay; // adjust for delay time
michael@0 230
michael@0 231 // Now make the request ...
michael@0 232 g_moztest_Href = document.location.href.split('?')[0] +
michael@0 233 "?c_part=" + c_part +
michael@0 234 "&index=" + ++index + // increment the request index
michael@0 235 "&id=" + q['id'] +
michael@0 236 "&maxcyc=" + maxcyc +
michael@0 237 "&replace=" + replace +
michael@0 238 "&nocache=" + nocache +
michael@0 239 "&delay=" + delay +
michael@0 240 "&timeout=" + timeout +
michael@0 241 "&c_intvl=" + c_intvl +
michael@0 242 "&s_ts=" + g_moztest_ServerTime +
michael@0 243 "&c_ts=" + c_ts +
michael@0 244 "&content=" + g_moztest_Content;
michael@0 245 window.setTimeout("moztest_setLocationHref(g_moztest_Href,false);", delay);
michael@0 246 return true;
michael@0 247 }
michael@0 248
michael@0 249 function moztest_onDocumentLoad() {
michael@0 250 var loadTime = (new Date()).getTime() - g_moztest_Start;
michael@0 251 window.clearTimeout(g_moztest_safetyTimer); // the onload has fired, clear the safety
michael@0 252 moztest_nextRequest(loadTime);
michael@0 253 }
michael@0 254
michael@0 255 function moztest_safetyValve() {
michael@0 256 moztest_nextRequest(Number.NaN); // if the onload never fires
michael@0 257 }
michael@0 258
michael@0 259 // normal processing is to calculate load time and fetch another URL
michael@0 260 window.onload = moztest_onDocumentLoad;
michael@0 261
michael@0 262 //]]>
michael@0 263
michael@0 264 ENDOFJS
michael@0 265
michael@0 266 1; # return true from module

mercurial