The Tor Browser: tools/footprint/leak-gauge.pl@97036ab72558 (annotated)

tools/footprint/leak-gauge.pl@97036ab72558 (annotated)

tools/footprint/leak-gauge.pl

Tue, 06 Jan 2015 21:39:09 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Tue, 06 Jan 2015 21:39:09 +0100
branch: TOR_BUG_9701
changeset 8: 97036ab72558
permissions: -rwxr-xr-x

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

 #!/usr/bin/perl -w
 # vim:sw=4:ts=4:et:
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 # $Id: leak-gauge.pl,v 1.8 2008/02/08 19:55:03 dbaron%dbaron.org Exp $
 # This script is designed to help testers isolate and simplify testcases
 # for many classes of leaks (those that involve large graphs of core
 # data structures) in Mozilla-based browsers.  It is designed to print
 # information about what has leaked by processing a log taken while
 # running the browser.  Such a log can be taken over a long session of
 # normal browsing and then the log can be processed to find sites that
 # leak.  Once a site is known to leak, the logging can then be repeated
 # to figure out under what conditions the leak occurs.
 #
 # The way to create this log is to set the environment variables:
 #   NSPR_LOG_MODULES=DOMLeak:5,DocumentLeak:5,nsDocShellLeak:5,NodeInfoManagerLeak:5
 #   NSPR_LOG_FILE=nspr.log     (or any other filename of your choice)
 # in your shell and then run the program.
 # * In a Windows command prompt, set environment variables with
 #     set VAR=value
 # * In an sh-based shell such as bash, set environment variables with
 #     export VAR=value
 # * In a csh-based shell such as tcsh, set environment variables with
 #     setenv VAR value
 #
 # Then, after you have exited the browser, run this perl script over the
 # log.  Either of the following commands should work:
 #   perl ./path/to/leak-gauge.pl nspr.log
 #   cat nspr.log | perl ./path/to/leak-gauge.pl
 # and it will tell you which of certain core objects leaked and the URLs
 # associated with those objects.
 # Nobody said I'm not allowed to write my own object system in perl.  No
 # classes here.  Just objects and methods.
 sub call {
     my $func = shift;
     my $obj = shift;
     my $funcref = ${$obj}{$func};
     &$funcref($obj, @_);
 }
 # A hash of objects (keyed by the first word of the line in the log)
 # that have two public methods, handle_line and dump (to be called using
 # call, above), along with any private data they need.
 my $handlers = {
     "DOMWINDOW" => {
         count => 0,
         windows => {},
         handle_line => sub($$) {
             my ($self, $line) = @_;
             my $windows = ${$self}{windows};
             if ($line =~ /^([0-9a-f]*) (\S*)/) {
                 my ($addr, $verb, $rest) = ($1, $2, $');
                 if ($verb eq "created") {
                     $rest =~ / outer=([0-9a-f]*)$/ || die "outer expected";
                     my $outer = $1;
                     ${$windows}{$addr} = { outer => $1 };
                     ++${$self}{count};
                 } elsif ($verb eq "destroyed") {
                     delete ${$windows}{$addr};
                 } elsif ($verb eq "SetNewDocument") {
                     $rest =~ /^ (.*)$/ || die "URI expected";
                     my $uri = ($1);
                     ${${$windows}{$addr}}{$uri} = 1;
                 }
             }
         },
         dump => sub ($) {
             my ($self) = @_;
             my $windows = ${$self}{windows};
             foreach my $addr (keys(%{$windows})) {
                 my $winobj = ${$windows}{$addr};
                 my $outer = delete ${$winobj}{outer};
                 print "Leaked " . ($outer eq "0" ? "outer" : "inner") .
                       " window $addr " .
                       ($outer eq "0" ? "" : "(outer $outer) ") .
                       "at address $addr.\n";
                 foreach my $uri (keys(%{$winobj})) {
                     print " ... with URI \"$uri\".\n";
                 }
             }
         },
         summary => sub($) {
             my ($self) = @_;
             my $windows = ${$self}{windows};
             print 'Leaked ' . keys(%{$windows}) . ' out of ' .
                   ${$self}{count} . " DOM Windows\n";
         }
     },
     "DOCUMENT" => {
         count => 0,
         docs => {},
         handle_line => sub($$) {
             my ($self, $line) = @_;
             # This doesn't work; I don't have time to figure out why not.
             # my $docs = ${$self}{docs};
             my $docs = ${$handlers}{"DOCUMENT"}{docs};
             if ($line =~ /^([0-9a-f]*) (\S*)/) {
                 my ($addr, $verb, $rest) = ($1, $2, $');
                 if ($verb eq "created") {
                     ${$docs}{$addr} = {};
                     ++${$self}{count};
                 } elsif ($verb eq "destroyed") {
                     delete ${$docs}{$addr};
                 } elsif ($verb eq "ResetToURI" ||
                          $verb eq "StartDocumentLoad") {
                     $rest =~ /^ (.*)$/ || die "URI expected";
                     my $uri = $1;
                     my $doc_info = ${$docs}{$addr};
                     ${$doc_info}{$uri} = 1;
                     if (exists(${$doc_info}{"nim"})) {
                         ${$doc_info}{"nim"}{$uri} = 1;
                     }
                 }
             }
         },
         dump => sub ($) {
             my ($self) = @_;
             my $docs = ${$self}{docs};
             foreach my $addr (keys(%{$docs})) {
                 print "Leaked document at address $addr.\n";
                 foreach my $uri (keys(%{${$docs}{$addr}})) {
                     print " ... with URI \"$uri\".\n" unless $uri eq "nim";
                 }
             }
         },
         summary => sub($) {
             my ($self) = @_;
             my $docs = ${$self}{docs};
             print 'Leaked ' . keys(%{$docs}) . ' out of ' .
                   ${$self}{count} . " documents\n";
         }
     },
     "DOCSHELL" => {
         count => 0,
         shells => {},
         handle_line => sub($$) {
             my ($self, $line) = @_;
             my $shells = ${$self}{shells};
             if ($line =~ /^([0-9a-f]*) (\S*)/) {
                 my ($addr, $verb, $rest) = ($1, $2, $');
                 if ($verb eq "created") {
                     ${$shells}{$addr} = {};
                     ++${$self}{count};
                 } elsif ($verb eq "destroyed") {
                     delete ${$shells}{$addr};
                 } elsif ($verb eq "InternalLoad" ||
                          $verb eq "SetCurrentURI") {
                     $rest =~ /^ (.*)$/ || die "URI expected";
                     my $uri = $1;
                     ${${$shells}{$addr}}{$uri} = 1;
                 }
             }
         },
         dump => sub ($) {
             my ($self) = @_;
             my $shells = ${$self}{shells};
             foreach my $addr (keys(%{$shells})) {
                 print "Leaked docshell at address $addr.\n";
                 foreach my $uri (keys(%{${$shells}{$addr}})) {
                     print " ... which loaded URI \"$uri\".\n";
                 }
             }
         },
         summary => sub($) {
             my ($self) = @_;
             my $shells = ${$self}{shells};
             print 'Leaked ' . keys(%{$shells}) . ' out of ' .
                   ${$self}{count} . " docshells\n";
         }
     },
     "NODEINFOMANAGER" => {
         count => 0,
         nims => {},
         handle_line => sub($$) {
             my ($self, $line) = @_;
             my $nims = ${$self}{nims};
             if ($line =~ /^([0-9a-f]*) (\S*)/) {
                 my ($addr, $verb, $rest) = ($1, $2, $');
                 if ($verb eq "created") {
                     ${$nims}{$addr} = {};
                     ++${$self}{count};
                 } elsif ($verb eq "destroyed") {
                     delete ${$nims}{$addr};
                 } elsif ($verb eq "Init") {
                     $rest =~ /^ document=(.*)$/ ||
                         die "document pointer expected";
                     my $doc = $1;
                     if ($doc ne "0") {
                         my $nim_info = ${$nims}{$addr};
                         my $doc_info = ${$handlers}{"DOCUMENT"}{docs}{$doc};
                         foreach my $uri (keys(%{$doc_info})) {
                             ${$nim_info}{$uri} = 1;
                         }
                         ${$doc_info}{"nim"} = $nim_info;
                     }
                 }
             }
         },
         dump => sub ($) {
             my ($self) = @_;
             my $nims = ${$self}{nims};
             foreach my $addr (keys(%{$nims})) {
                 print "Leaked content nodes associated with node info manager at address $addr.\n";
                 foreach my $uri (keys(%{${$nims}{$addr}})) {
                     print " ... with document URI \"$uri\".\n";
                 }
             }
         },
         summary => sub($) {
             my ($self) = @_;
             my $nims = ${$self}{nims};
             print 'Leaked content nodes within ' . keys(%{$nims}) . ' out of ' .
                   ${$self}{count} . " documents\n";
         }
     }
 };
 while (<>) {
     # strip off initial "-", thread id, and thread pointer; separate
     # first word and rest
     if (/^\-?[0-9]*\[[0-9a-f]*\]: (\S*) ([^\n\r]*)[\n\r]*$/) {
         my ($handler, $data) = ($1, $2);
         if (defined(${$handlers}{$handler})) {
             call("handle_line", ${$handlers}{$handler}, $data);
         }
     }
 }
 foreach my $key (keys(%{$handlers})) {
     call("dump", ${$handlers}{$key});
 }
 print "Summary:\n";
 foreach my $key (keys(%{$handlers})) {
     call("summary", ${$handlers}{$key});
 }

The Tor Browser / annotate

tools/footprint/leak-gauge.pl@97036ab72558 (annotated)

tools/footprint/leak-gauge.pl