The Tor Browser: tools/trace-malloc/leak-soup.pl@97036ab72558

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 #!/usr/bin/perl -w

2 #

     3 # This Source Code Form is subject to the terms of the Mozilla Public

     4 # License, v. 2.0. If a copy of the MPL was not distributed with this

     5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

     7 # A perl version of Patrick Beard's ``Leak Soup'', which processes the

     8 # stack crawls from the Boehm GC into a graph.

    10 use 5.004;

    11 use strict;

    12 use Getopt::Long;

    13 use FileHandle;

    14 use IPC::Open2;

    16 # Collect program options

    17 $::opt_help = 0;

    18 $::opt_detail = 0;

    19 $::opt_fragment = 1.0; # Default to no fragment analysis

    20 $::opt_nostacks = 0;

    21 $::opt_nochildstacks = 0;

    22 $::opt_depth = 9999;

    23 $::opt_noentrained = 0;

    24 $::opt_noslop = 0;

    25 $::opt_showtype = -1; # default to listing all types

    26 $::opt_stackrefine = "C";

    27 @::opt_stackretype = ();

    28 @::opt_stackskipclass = ();

    29 @::opt_stackskipfunc = ();

    30 @::opt_typedivide = ();

    32 GetOptions("help", "detail", "format=s", "fragment=f", "nostacks",

    33 	   "nochildstacks", "depth=i", "noentrained", "noslop", "showtype=i",

    34 	   "stackrefine=s", "stackretype=s@", "stackskipclass=s@", "stackskipfunc=s@",

    35 	   "typedivide=s@"

    36 	   );

    38 if ($::opt_help) {

    39     die "usage: leak-soup.pl [options] <leakfile>

    40   --help                 Display this message

    41   --detail               Provide details of memory sweeping from child to parents

    42   --fragment=ratio       Histogram bucket ratio for fragmentation analysis

    43 #  --nostacks            Do not compute stack traces

    44 #  --nochildstacks       Do not compute stack traces for entrained objects

    45 #  --depth=<max>         Only compute stack traces to depth of <max>

    46 #  --noentrained         Do not compute amount of memory entrained by root objects

    47   --noslop               Don't ignore low bits when searching for pointers

    48   --showtype=<i>         Show memory usage histogram for most-significant <i> types

    49   --stackrefine={F|C}    During stack based refinement, use 'F'ull name name or just 'C'lass

    50   --stackretype=type     Use allocation stack to refine vague types like void*

    51   --stackskipclass=class When refining types, ignore stack frames from 'class'

    52   --stackskipfunc=func   When refining types, ignore stack frames for 'func'

    53   --typedivide=type      Subdivide 'type' based on objects pointing to each instance

    54 ";

    55 }

    57 # This is the table that keeps a graph of objects. It's indexed by the

    58 # object's address (as an integer), and refers to a simple hash that

    59 # has information about the object's type, size, slots, and allocation

    60 # stack.

    61 %::Objects = %{0};

    63 # This will be a list of keys to (addresses in) Objects, that is sorted

    64 # It gets used to evaluate overlaps, calculate fragmentation, and chase

    65 # parent->child (interior) pointers.

    66 @::SortedAddresses = [];

    68 # This is the table that keeps track of memory usage on a per-type basis.

    69 # It is indexed by the type name (string), and keeps a tally of the

    70 # total number of such objects, and the memory usage of such objects.

    71 %::Types = %{0};

    72 $::TotalSize = 0; # sum of sizes of all objects included $::Types{}

    74 # This is an array of leaf node addresses.  A leaf node has no children

    75 # with memory allocations. We traverse them sweeping memory

    76 # tallies into parents.  Note that after all children have

    77 # been swept into a parent, that parent may also become a leaf node.

    78 @::Leafs = @{0};

    83 #----------------------------------------------------------------------

    84 #

    85 # Decode arguments to override default values for doing call-stack-based

    86 # refinement of typename based on contents of the stack at allocation time.

    87 #

    89 # List the types that we need to refine (if any) based on allocation stack

    90 $::VagueType = {

    91     'void*' => 1,

    92 };

    94 # With regard to the stack, ignore stack frames in the following

    95 # overly vague classes.

    96 $::VagueClasses = {

    97 #    'nsStr' => 1,

    98     'nsVoidArray' => 1,

    99 };

   101 # With regard to stack, ignore stack frames with the following vague

   102 # function names

   103 $::VagueFunctions = {

   104     'PL_ArenaAllocate' => 1,

   105     'PL_HashTableFinalize(PLHashTable *)' => 1,

   106     'PL_HashTableInit__FP11PLHashTableUiPFPCv_UiPFPCvPCv_iT3PC14PLHashAllocOpsPv' => 1,

   107     'PL_HashTableRawAdd' => 1,

   108     '__builtin_vec_new' => 1,

   109     '_init' => 1,

   110     'il_get_container(_IL_GroupContext *, ImgCachePolicy, char const *, _NI_IRGB *, IL_DitherMode, int, int, int)' => 1,

   111     'nsCStringKey::Clone(void) const' => 1,

   112     'nsCppSharedAllocator<unsigned short>::allocate(unsigned int, void const *)' => 1,

   113     'nsHashtable::Put(nsHashKey *, void *)' => 1,

   114     'nsHashtable::nsHashtable(unsigned int, int)' => 1,

   115     'nsMemory::Alloc(unsigned int)' => 1,

   116     'nsMemoryImpl::Alloc(unsigned int)' => 1,

   117 };

   119 sub init_stack_based_type_refinement() {

   120     # Move across stackretype options, or use default values

   121     if ($#::opt_stackretype < 0) {

   122 	print "Default --stackretype options will be used (since none were specified)\n";

   123 	print "  use --stackretype='nothing' to disable re-typing activity\n";

   124     } else {

   125 	foreach my $type (keys %{$::VagueType}) {

   126 	    delete ($::VagueType->{$type});

   127 	}

   128 	if ($#::opt_stackretype == 0 && $::opt_stackretype[0] eq 'nothing') {

   129 	    print "Types will not be refined based on call stack\n";

   130 	} else {

   131 	    foreach my $type (@::opt_stackretype) {

   132 		$::VagueType->{$type} = 1;

   133 	    }

   134 	}

   135     }

   138     if (keys %{$::VagueType}) {

   139 	print "The following type(s) will be refined based on call stacks:\n";

   140 	foreach my $type (sort keys %{$::VagueType}) {

   141 	    print "     $type\n";

   142 	}

   143 	print "Equivalent command line argument(s):\n";

   144 	foreach my $type (sort keys %{$::VagueType}) {

   145 	    print " --stackretype='$type'";

   146 	}

   147 	print "\n\n";

   149 	if ($#::opt_stackskipclass < 0) {

   150 	    print "Default --stackskipclass options will be used (since none were specified)\n";

   151 	    print "  use --stackskipclass='nothing' to disable skipping stack frames based on class names\n";

   152 	} else {

   153 	    foreach my $type (keys %{$::VagueClasses}) {

   154 		delete ($::VagueClasses->{$type});

   155 	    }

   156 	    if ($#::opt_stackskipclass == 0 && $::opt_stackskipclass[0] eq 'nothing') {

   157 		print "Types will not be refined based on call stack\n";

   158 	    } else {

   159 		foreach my $type (@::opt_stackskipclass) {

   160 		    $::VagueClasses->{$type} = 1;

   161 		}

   162 	    }

   163 	}

   165 	if (keys %{$::VagueClasses}) {

   166 	    print "Stack frames from the following class(es) will not be used to refine types:\n";

   167 	    foreach my $class (sort keys %{$::VagueClasses}) {

   168 		print "     $class\n";

   169 	    }

   170 	    print "Equivalent command line argument(s):\n";

   171 	    foreach my $class (sort keys %{$::VagueClasses}) {

   172 		print " --stackskipclass='$class'";

   173 	    }

   174 	    print "\n\n";

   175 	}

   178 	if ($#::opt_stackskipfunc < 0) {

   179 	    print "Default --stackskipfunc options will be used (since none were specified)\n";

   180 	    print "  use --stackskipfunc='nothing' to disable skipping stack frames based on function names\n";

   181 	} else {

   182 	    foreach my $type (keys %{$::VagueFunctions}) {

   183 		delete ($::VagueFunctions->{$type});

   184 	    }

   185 	    if ($#::opt_stackskipfunc == 0 && $::opt_stackskipfunc[0] eq 'nothing') {

   186 		print "Types will not be refined based on call stack\n";

   187 	    } else {

   188 		foreach my $type (@::opt_stackskipfunc) {

   189 		    $::VagueFunctions->{$type} = 1;

   190 		}

   191 	    }

   192 	}

   194 	if (keys %{$::VagueFunctions}) {

   195 	    print "Stack frames from the following function(s) will not be used to refine types:\n";

   196 	    foreach my $func (sort keys %{$::VagueFunctions}) {

   197 		print "     $func\n";

   198 	    }

   199 	    print "Equivalent command line argument(s):\n";

   200 	    foreach my $func (sort keys %{$::VagueFunctions}) {

   201 		print " --stackskipfunc='$func'";

   202 	    }

   203 	    print "\n\n";

   204 	}

   205     }

   206 }

   209 #----------------------------------------------------------------------

   210 #

   211 # Read in the output from the Boehm GC or Trace-malloc.

   212 #

   213 sub read_boehm() {

   214   OBJECT: while (<>) {

   215       # e.g., 0x0832FBD0 <void*> (80)

   216       next OBJECT unless /^0x(\S+) <(.*)> \((\d+)\)/;

   217       my ($addr, $type, $size) = (hex $1, $2, $3);

   219       my $object = $::Objects{$addr};

   220       if (! $object) {

   221           # Found a new object entry. Record its type and size

   222           $::Objects{$addr} =

   223               $object =

   224               { 'type' => $type, 'size' => $size };

   225       } else {

   226 	  print "Duplicate address $addr contains $object->{'type'} and $type\n";

   227 	  $object->{'dup_addr_count'}++;

   228       }

   230       # Record the object's slots

   231       my @slots;

   233     SLOT: while (<>) {

   234         # e.g.,      0x00000000

   235         last SLOT unless /^\t0x(\S+)/;

   236         my $value = hex $1;

   238         # Ignore low bits, unless they've specified --noslop

   239         $value &= ~0x7 unless $::opt_noslop;

   241         $slots[$#slots + 1] = $value;

   242     }

   244       $object->{'slots'} = \@slots;

   246       if (@::opt_stackretype && (defined $::VagueType->{$type})) {

   247 	  # Change the value of type of the object based on stack

   248 	  # if we can find an interesting calling function

   249         VAGUEFRAME: while (<>) {

   250             # e.g., _dl_debug_message[/lib/ld-linux.so.2 +0x0000B858]

   251             last VAGUEFRAMEFRAME unless /^(.*)\[(.*) \+0x(\S+)\]$/;

   252             my ($func, $lib, $off) = ($1, $2, hex $3);

   253             chomp;

   255 	    my ($class,,$fname) = split(/:/, $func);

   256 	    next VAGUEFRAME if (defined $::VagueFunctions->{$func} ||

   257 				defined $::VagueClasses->{$class});

   259 	    # Refine typename and exit stack scan

   260 	    $object->{'type'} = $type . ":" .

   261 		(('C' eq $::opt_stackrefine) ?

   262 		 $class :

   263 		 $func);

   264 	    last VAGUEFRAME;

   265 	}

   266       } else {

   267 	  # Save all stack info if requested

   268 	  if (! $::opt_nostacks) {

   269 	      # Record the stack by which the object was allocated

   270 	      my @stack;

   272 	    FRAME: while (<>) {

   273 		# e.g., _dl_debug_message[/lib/ld-linux.so.2 +0x0000B858]

   274 		last FRAME unless /^(.*)\[(.*) \+0x(\S+)\]$/;

   275 		my ($func, $lib, $off) = ($1, $2, hex $3);

   276 		chomp;

   278 		$stack[$#stack + 1] = $_;

   279 	    }

   281 	      $object->{'stack'} = \@stack;

   282 	  }

   283       }

   285       # Gotta check EOF explicitly...

   286       last OBJECT if eof;

   287   }

   288 }

   291 #----------------------------------------------------------------------

   292 #

   293 # Read input

   294 #

   295 init_stack_based_type_refinement();

   296 read_boehm;

   300 #----------------------------------------------------------------------

   301 #

   302 # Do basic initialization of the type hash table.  Accumulate

   303 # total counts, and basic memory usage (not including children)

   304 sub load_type_table() {

   305     # Reset global counter and hash table

   306     $::TotalSize = 0;

   307     %::Types = %{0};

   309     OBJECT: foreach my $addr (keys %::Objects) {

   310 	my $obj = $::Objects{$addr};

   311 	my ($type, $size, $swept_in, $overlap_count, $dup_addr_count) =

   312 	    ($obj->{'type'}, $obj->{'size'},

   313 	     $obj->{'swept_in'},

   314 	     $obj->{'overlap_count'},$obj->{'dup_addr_count'});

   316 	my $type_data = $::Types{$type};

   317 	if (! defined $type_data) {

   318 	    $::Types{$type} =

   319 		$type_data = {'count' => 0, 'size' => 0,

   320 			      'max' => $size, 'min' => $size,

   321 			      'swept_in' => 0, 'swept' => 0,

   322 			      'overlap_count' => 0,

   323 			      'dup_addr_count' => 0};

   324 	}

   326 	if (!$size) {

   327 	    $type_data->{'swept'}++;

   328 	    next OBJECT;

   329 	}

   330 	$::TotalSize += $size;

   332 	$type_data->{'count'}++;

   333 	$type_data->{'size'} += $size;

   334 	if (defined $swept_in) {

   335 	    $type_data->{'swept_in'} += $swept_in;

   337 	    if ($::opt_detail) {

   338 		my $type_detail_sizes = $type_data->{'sweep_details_size'};

   339 		my $type_detail_counts;

   340 		if (!defined $type_detail_sizes) {

   341 		    $type_detail_sizes = $type_data->{'sweep_details_size'} = {};

   342 		    $type_detail_counts = $type_data->{'sweep_details_count'} = {};

   343 		} else {

   344 		    $type_detail_counts = $type_data->{'sweep_details_count'};

   345 		}

   347 		my $sweep_details = $obj->{'sweep_details'};

   348 		for my $swept_addr (keys (%{$sweep_details})) {

   349 		    my $swept_obj = $::Objects{$swept_addr};

   350 		    my $swept_type = $swept_obj->{'type'};

   351 		    $type_detail_sizes->{$swept_type} += $sweep_details->{$swept_addr};

   352 		    $type_detail_counts->{$swept_type}++;

   353 		}

   354 	    }

   355 	}

   356 	if (defined $overlap_count) {

   357 	    $type_data->{'overlap_count'} += $overlap_count;

   358 	}

   360 	if (defined $dup_addr_count) {

   361 	    $type_data->{'dup_addr_count'} += $dup_addr_count;

   362 	}

   364 	if ($type_data->{'max'} < $size) {

   365 	    $type_data->{'max'} = $size;

   366 	}

   367 	# Watch out for case where min is produced by a swept object

   368 	if (!$type_data->{'min'} || $type_data->{'min'} > $size) {

   369 	    $type_data->{'min'} = $size;

   370 	}

   371     }

   372 }

   375 #----------------------------------------------------------------------

   376 sub print_type_table(){

   377     if (!$::opt_showtype) {

   378 	return;

   379     }

   380     my $line_count = 0;

   381     my $bytes_printed_tally = 0;

   383     # Display type summary information

   384     my @sorted_types = keys (%::Types);

   385     print "There are ", 1 + $#sorted_types, " types containing ", $::TotalSize, " bytes\n";

   386     @sorted_types = sort {$::Types{$b}->{'size'}

   387 			  <=> $::Types{$a}->{'size'} } @sorted_types;

   389     foreach my $type (@sorted_types) {

   390 	last if ($line_count++ == $::opt_showtype);

   392 	my $type_data = $::Types{$type};

   393 	$bytes_printed_tally += $type_data->{'size'};

   395 	if ($type_data->{'count'}) {

   396 	    printf "%.2f%% ", $type_data->{'size'} * 100.0/$::TotalSize;

   397 	    print $type_data->{'size'},

   398 	    "\t(",

   399 	    $type_data->{'min'}, "/",

   400 	    int($type_data->{'size'} / $type_data->{'count'}),"/",

   401 	    $type_data->{'max'}, ")";

   402 	    print "\t", $type_data->{'count'},

   403 	    " x ";

   404 	}

   405 	print $type;

   407 	if ($type_data->{'swept_in'}) {

   408 	    print ", $type_data->{'swept_in'} sub-objs absorbed";

   409 	}

   410 	if ($type_data->{'swept'}) {

   411 	    print ", $type_data->{'swept'} swept away";

   412 	}

   413 	if ($type_data->{'overlap_count'}) {

   414 	    print ", $type_data->{'overlap_count'} range overlaps";

   415 	}

   416 	if ($type_data->{'dup_addr_count'}) {

   417 	    print ", $type_data->{'dup_addr_count'} duplicated addresses";

   418 	}

   420 	print "\n" ;

   421 	if (defined $type_data->{'sweep_details_size'}) {

   422 	    my $sizes = $type_data->{'sweep_details_size'};

   423 	    my $counts = $type_data->{'sweep_details_count'};

   424 	    my @swept_types = sort {$sizes->{$b} <=> $sizes->{$a}} keys (%{$sizes});

   426 	    for my $type (@swept_types) {

   427 		printf "    %.2f%% ", $sizes->{$type} * 100.0/$::TotalSize;

   428 		print "$sizes->{$type}     (", int($sizes->{$type}/$counts->{$type}) , ")   $counts->{$type} x $type\n";

   429 	    }

   430 	    print "    ---------------\n";

   431 	}

   432     }

   433     if ($bytes_printed_tally != $::TotalSize) {

   434 	printf "%.2f%% ", ($::TotalSize- $bytes_printed_tally) * 100.0/$::TotalSize;

   435 	print $::TotalSize - $bytes_printed_tally, "\t not shown due to truncation of type list\n";

   436 	print "Currently only data on $::opt_showtype types are displayed, due to command \n",

   437 	    "line argument '--showtype=$::opt_showtype'\n\n";

   438     }

   440 }

   442 #----------------------------------------------------------------------

   443 #

   444 # Check for duplicate address ranges is Objects table, and

   445 # create list of sorted addresses for doing pointer-chasing

   447 sub validate_address_ranges() {

   448     # Build sorted list of address for validating interior pointers

   449     @::SortedAddresses = sort {$a <=> $b} keys %::Objects;

   451     # Validate non-overlap of memory

   452     my $prev_addr_end = -1;

   453     my $prev_addr = -1;

   454     my $index = 0;

   455     my $overlap_tally = 0; # overlapping object memory

   456     my $unused_tally = 0;  # unused memory between blocks

   457     while ($index <= $#::SortedAddresses) {

   458 	my $address = $::SortedAddresses[$index];

   459 	if ($prev_addr_end > $address) {

   460 	    print "Object overlap from $::Objects{$prev_addr}->{'type'}:$prev_addr-$prev_addr_end into";

   461 	    my $test_index = $index;

   462 	    my $prev_addr_overlap_tally = 0;

   464 	    while ($test_index <=  $#::SortedAddresses) {

   465 		my $test_address = $::SortedAddresses[$test_index];

   466 		last if ($prev_addr_end < $test_address);

   467 		print " $::Objects{$test_address}->{'type'}:$test_address";

   469 		$::Objects{$prev_addr}->{'overlap_count'}++;

   470 		$::Objects{$test_address}->{'overlap_count'}++;

   471 		my $overlap = $prev_addr_end - $test_address;

   472 		if ($overlap > $::Objects{$test_address}->{'size'}) {

   473 		    $overlap = $::Objects{$test_address}->{'size'};

   474 		}

   475 		print "($overlap bytes)";

   476 		$prev_addr_overlap_tally += $overlap;

   478 		$test_index++;

   479 	    }

   480 	    print " [total $prev_addr_overlap_tally bytes]";

   481 	    $overlap_tally += $prev_addr_overlap_tally;

   482 	    print "\n";

   483 	}

   485 	$prev_addr = $address;

   486 	$prev_addr_end = $prev_addr + $::Objects{$prev_addr}->{'size'} - 1;

   487 	$index++;

   488     } #end while

   489     if ($overlap_tally) {

   490 	print "Total overlap of $overlap_tally bytes\n";

   491     }

   492 }

   494 #----------------------------------------------------------------------

   495 #

   496 # Evaluate sizes of interobject spacing (fragmentation loss?)

   497 # Gather the sizes into histograms for analysis

   498 # This function assumes a sorted list of addresses is present globally

   500 sub generate_and_print_unused_memory_histogram() {

   501     print "\nInterobject spacing (fragmentation waste) Statistics\n";

   502     if ($::opt_fragment <= 1) {

   503 	print "Statistics are not being gathered.  Use '--fragment=10' to get stats\n";

   504 	return;

   505     }

   506     print "Ratio of histogram buckets will be a factor of $::opt_fragment\n";

   508     my $prev_addr_end = -1;

   509     my $prev_addr = -1;

   510     my $index = 0;

   512     my @fragment_count;

   513     my @fragment_tally;

   514     my $power;

   515     my $bucket_size;

   517     my $max_power = 0;

   519     my $tally_sizes = 0;

   521     while ($index <= $#::SortedAddresses) {

   522 	my $address = $::SortedAddresses[$index];

   524 	my $unused = $address - $prev_addr_end;

   526 	# handle overlaps gracefully

   527 	if ($unused < 0) {

   528 	    $unused = 0;

   529 	}

   531 	$power = 0;

   532 	$bucket_size = 1;

   533 	while ($bucket_size < $unused) {

   534 	    $bucket_size *= $::opt_fragment;

   535 	    $power++;

   536 	}

   537 	$fragment_count[$power]++;

   538 	$fragment_tally[$power] += $unused;

   539 	if ($power > $max_power) {

   540 	    $max_power = $power;

   541 	}

   542 	my $size = $::Objects{$address}->{'size'};

   543 	$tally_sizes += $size;

   544 	$prev_addr_end = $address + $size - 1;

   545 	$index++;

   546     }

   549     $power = 0;

   550     $bucket_size = 1;

   551     print "Basic gap histogram is (max_size:count):\n";

   552     while ($power <= $max_power) {

   553 	if (! defined $fragment_count[$power]) {

   554 	    $fragment_count[$power] = $fragment_tally[$power] = 0;

   555 	}

   556 	printf " %.1f:", $bucket_size;

   557 	print $fragment_count[$power];

   558 	$power++;

   559 	$bucket_size *= $::opt_fragment;

   560     }

   561     print "\n";

   563     print "Summary gap analysis:\n";

   565     $power = 0;

   566     $bucket_size = 1;

   567     my $tally = 0;

   568     my $count = 0;

   569     while ($power <= $max_power) {

   570 	$count += $fragment_count[$power];

   571 	$tally += $fragment_tally[$power];

   572 	print "$count gaps, totaling $tally bytes, were under ";

   573 	printf "%.1f bytes each", $bucket_size;

   574 	if ($count) {

   575 	    printf ", for an average of %.1f bytes per gap", $tally/$count, ;

   576 	}

   577 	print "\n";

   578 	$power++;

   579 	$bucket_size *= $::opt_fragment;

   580     }

   582     print "Total allocation was $tally_sizes bytes, or ";

   583     printf "%.0f bytes per allocation block\n\n", $tally_sizes/($count+1);

   585 }

   587 #----------------------------------------------------------------------

   588 #

   589 # Now thread the parents and children together by looking through the

   590 # slots for each object.

   591 #

   592 sub create_parent_links(){

   593     my $min_addr = $::SortedAddresses[0];

   594     my $max_addr = $::SortedAddresses[ $#::SortedAddresses]; #allow one beyond each object

   595     $max_addr += $::Objects{$max_addr}->{'size'};

   597     print "Viable addresses range from $min_addr to $max_addr for a total of ",

   598     $max_addr-$min_addr, " bytes\n\n";

   600     # Gather stats as we try to convert slots to children

   601     my $slot_count = 0;   # total slots examined

   602     my $fixed_addr_count = 0; # slots into interiors that were adjusted

   603     my $parent_child_count = 0;  # Number of parent-child links

   604     my $child_count = 0;   # valid slots, discounting sibling twins

   605     my $child_dup_count = 0; # number of duplicate child pointers

   606     my $self_pointer_count = 0; # count of discarded self-pointers

   608     foreach my $parent (keys %::Objects) {

   609 	# We'll collect a list of this parent object's children

   610 	# by iterating through its slots.

   611 	my @children;

   612 	my %children_hash;

   613 	my $self_pointer = 0;

   615 	my @slots = @{$::Objects{$parent}->{'slots'}};

   616 	$slot_count += $#slots + 1;

   617 	SLOT: foreach my $child (@slots) {

   619 	    # We only care about pointers that refer to other objects

   620 	    if (! defined $::Objects{$child}) {

   621 		# check to see if we are an interior pointer

   623 		# Punt if we are completely out of range

   624 		next SLOT unless ($max_addr >= $child &&

   625 				  $child >= $min_addr);

   627 		# Do binary search to find object below this address

   628 		my ($min_index, $beyond_index) = (0, $#::SortedAddresses + 1);

   629 		my $test_index;

   630 		while ($min_index !=

   631 		       ($test_index = int (($beyond_index+$min_index)/2)))  {

   632 		    if ($child >= $::SortedAddresses[$test_index]) {

   633 			$min_index = $test_index;

   634 		    } else {

   635 			$beyond_index = $test_index;

   636 		    }

   637 		}

   638 		# See if pointer is within extent of this object

   639 		my $address = $::SortedAddresses[$test_index];

   640 		next SLOT unless ($child <

   641 				  $address + $::Objects{$address}->{'size'});

   643 		# Make adjustment so we point to the actual child precisely

   644 		$child = $address;

   645 		$fixed_addr_count++;

   646 	    }

   648 	    if ($child == $parent) {

   649 		$self_pointer_count++;

   650 		next SLOT; # Discard self-pointers

   651 	    }

   653 	    # Avoid creating duplicate child-parent links

   654 	    if (! defined $children_hash{$child}) {

   655 		$parent_child_count++;

   656 		# Add the parent to the child's list of parents

   657 		my $parents = $::Objects{$child}->{'parents'};

   658 		if (! $parents) {

   659 		    $parents = $::Objects{$child}->{'parents'} = [];

   660 		}

   662 		$parents->[scalar(@$parents)] = $parent;

   664 		# Add the child to the parent's list of children

   665 		$children_hash{$child} = 1;

   666 	    } else {

   667 		$child_dup_count++;

   668 	    }

   669 	}

   670 	@children = keys %children_hash;

   671 	# Track tally of unique children linked

   672 	$child_count += $#children + 1;

   674 	$::Objects{$parent}->{'children'} = \@children;

   676 	if (! @children) {

   677 	    $::Leafs[$#::Leafs + 1] = $parent;

   678 	}

   679     }

   680     print "Scanning $#::SortedAddresses objects, we found $parent_child_count parents-to-child connections by chasing $slot_count pointers.\n",

   681     "This required $fixed_addr_count interior pointer fixups, skipping $child_dup_count duplicate pointers, ",

   682     "and $self_pointer_count self pointers\nAlso discarded ",

   683     $slot_count - $parent_child_count -$self_pointer_count - $child_dup_count,

   684     " out-of-range pointers\n\n";

   685 }

   688 #----------------------------------------------------------------------

   689 # For every leaf, if a leaf has only one parent, then sweep the memory

   690 # cost into the parent from the leaf

   691 sub sweep_leaf_memory () {

   692     my $sweep_count = 0;

   693     my $leaf_counter = 0;

   694     LEAF: while ($leaf_counter <= $#::Leafs) {

   695 	my $leaf_addr = $::Leafs[$leaf_counter++];

   696 	my $leaf_obj = $::Objects{$leaf_addr};

   697 	my $parents = $leaf_obj->{'parents'};

   699 	next LEAF if (! defined($parents) || 1 != scalar(@$parents));

   701 	# We have only one parent, so we'll try to sweep upwards

   702 	my $parent_addr = @$parents[0];

   703 	my $parent_obj = $::Objects{$parent_addr};

   705 	# watch out for self-pointers

   706 	next LEAF if ($parent_addr == $leaf_addr);

   708 	if ($::opt_detail) {

   709 	    foreach my $obj ($parent_obj, $leaf_obj) {

   710 		if (!defined $obj->{'original_size'}) {

   711 		    $obj->{'original_size'} = $obj->{'size'};

   712 		}

   713 	    }

   714 	    if (defined $leaf_obj->{'sweep_details'}) {

   715 		if (defined $parent_obj->{'sweep_details'}) { # merge details

   716 		    foreach my $swept_obj (keys (%{$leaf_obj->{'sweep_details'}})) {

   717 			%{$parent_obj->{'sweep_details'}}->{$swept_obj} =

   718 			    %{$leaf_obj->{'sweep_details'}}->{$swept_obj};

   719 		    }

   720 		} else { # No parent info

   721 		    $parent_obj->{'sweep_details'} = \%{$leaf_obj->{'sweep_details'}};

   722 		}

   723 		delete $leaf_obj->{'sweep_details'};

   724 	    } else { # no leaf detail

   725 		if (!defined $parent_obj->{'sweep_details'}) {

   726 		    $parent_obj->{'sweep_details'} = {};

   727 		}

   728 	    }

   729 	    %{$parent_obj->{'sweep_details'}}->{$leaf_addr} = $leaf_obj->{'original_size'};

   730 	}

   732 	$parent_obj->{'size'} += $leaf_obj->{'size'};

   733 	$leaf_obj->{'size'} = 0;

   735 	if (defined ($leaf_obj->{'swept_in'})) {

   736 	    $parent_obj->{'swept_in'} += $leaf_obj->{'swept_in'};

   737 	    $leaf_obj->{'swept_in'} = 0;  # sweep has been handed off to parent

   738 	}

   739 	$parent_obj->{'swept_in'} ++;  # tally swept in leaf_obj

   741 	$sweep_count++;

   743 	# See if we created another leaf

   744 	my $consumed_children = $parent_obj->{'consumed'}++;

   745 	my @children = $parent_obj->{'children'};

   746 	if ($consumed_children == $#children) {

   747 	    $::Leafs[$#::Leafs + 1] = @$parents[0];

   748 	}

   749     }

   750     print "Processed ", $leaf_counter, " leaves sweeping memory to parents in ", $sweep_count, " objects\n";

   751 }

   754 #----------------------------------------------------------------------

   755 #

   756 # Subdivide the types of objects that are in our "expand" list

   757 # List types that should be sub-divided based on parents, and possibly

   758 # children

   759 # The argument supplied is a hash table with keys selecting types that

   760 # need to be "refined" by including the types of the parent objects,

   761 # and (when we are desparate) the types of the children objects.

   763 sub expand_type_names($) {

   764     my %TypeExpand = %{$_[0]};

   766     my @retype; # array of addrs that get extended type names

   767     foreach my $child (keys %::Objects) {

   768 	my $child_obj = $::Objects{$child};

   769 	next unless (defined ($TypeExpand{$child_obj->{'type'}}));

   771 	foreach my $relation ('parents','children') {

   772 	    my $relatives = $child_obj->{$relation};

   773 	    next unless defined @$relatives;

   775 	    # Sort out the names of the types of the relatives

   776 	    my %names;

   777 	    foreach my $relative (@$relatives) {

   778 		%names->{$::Objects{$relative}->{'type'}} = 1;

   779 	    }

   780 	    my $related_type_names = join(',' , sort(keys(%names)));

   783 	    $child_obj->{'name' . $relation} = $related_type_names;

   785 	    # Don't bother with children if we have significant parent types

   786 	    last if (!defined ($TypeExpand{$related_type_names}));

   787 	}

   788 	$retype[$#retype + 1] = $child;

   789     }

   791     # Revisit all addresses we've marked

   792     foreach my $child (@retype) {

   793 	my $child_obj = $::Objects{$child};

   794 	$child_obj->{'type'} = $TypeExpand{$child_obj->{'type'}};

   795 	my $extended_type = $child_obj->{'namechildren'};

   796 	if (defined $extended_type) {

   797 	    $child_obj->{'type'}.= "->(" . $extended_type . ")";

   798 	    delete ($child_obj->{'namechildren'});

   799 	}

   800 	$extended_type = $child_obj->{'nameparents'};

   801 	if (defined $extended_type) {

   802 	    $child_obj->{'type'} = "(" . $extended_type . ")->" . $::Objects{$child}->{'type'};

   803 	    delete ($child_obj->{'nameparents'});

   804 	}

   805     }

   806 }

   808 #----------------------------------------------------------------------

   809 #

   810 # Print out a type histogram

   812 sub print_type_histogram() {

   813     load_type_table();

   814     print_type_table();

   815     print "\n\n";

   816 }

   819 #----------------------------------------------------------------------

   820 # Provide a nice summary of the types during the process

   821 validate_address_ranges();

   822 create_parent_links();

   824 print "\nBasic memory use histogram is:\n";

   825 print_type_histogram();

   827 generate_and_print_unused_memory_histogram();

   829 sweep_leaf_memory ();

   830 print "After doing basic leaf-sweep processing of instances:\n";

   831 print_type_histogram();

   833 {

   834     foreach my $typename (@::opt_typedivide) {

   835 	my %expansion_table;

   836 	$expansion_table{$typename} = $typename;

   837 	expand_type_names(\%expansion_table);

   838 	print "After subdividing <$typename> based on inbound (and somtimes outbound) pointers:\n";

   839 	print_type_histogram();

   840     }

   841 }

   843 exit();  # Don't bother with SCCs yet.

   846 #----------------------------------------------------------------------

   847 #

   848 # Determine objects that entrain equivalent sets, using the strongly

   849 # connected component algorithm from Cormen, Leiserson, and Rivest,

   850 # ``An Introduction to Algorithms'', MIT Press 1990, pp. 488-493.

   851 #

   852 sub compute_post_order($$$) {

   853 # This routine produces a post-order of the call graph (what CLR call

   854 # ``ordering the nodes by f[u]'')

   855     my ($parent, $visited, $finish) = @_;

   857     # Bail if we've already seen this node

   858     return if $visited->{$parent};

   860     # We have now!

   861     $visited->{$parent} = 1;

   863     # Walk the children

   864     my $children = $::Objects{$parent}->{'children'};

   866     foreach my $child (@$children) {

   867         compute_post_order($child, $visited, $finish);

   868     }

   870     # Now that we've walked all the kids, we can append the parent to

   871     # the post-order

   872     @$finish[scalar(@$finish)] = $parent;

   873 }

   875 sub compute_equivalencies($$$) {

   876 # This routine recursively computes equivalencies by walking the

   877 # transpose of the callgraph.

   878     my ($child, $table, $equivalencies) = @_;

   880     # Bail if we've already seen this node

   881     return if $table->{$child};

   883     # Otherwise, append ourself to the list of equivalencies...

   884     @$equivalencies[scalar(@$equivalencies)] = $child;

   886     # ...and note our other equivalents in the table

   887     $table->{$child} = $equivalencies;

   889     my $parents = $::Objects{$child}->{'parents'};

   891     foreach my $parent (@$parents) {

   892         compute_equivalencies($parent, $table, $equivalencies);

   893     }

   894 }

   896 sub compute_equivalents() {

   897 # Here's the strongly connected components algorithm. (Step 2 has been

   898 # done implictly by our object graph construction.)

   899     my %visited;

   900     my @finish;

   902     # Step 1. Compute a post-ordering of the object graph

   903     foreach my $parent (keys %::Objects) {

   904         compute_post_order($parent, \%visited, \@finish);

   905     }

   907     # Step 3. Traverse the transpose of the object graph in reverse

   908     # post-order, collecting vertices into %equivalents

   909     my %equivalents;

   910     foreach my $child (reverse @finish) {

   911         compute_equivalencies($child, \%equivalents, []);

   912     }

   914     # Now, we'll trim the %equivalents table, arbitrarily removing

   915     # ``redundant'' entries.

   916   EQUIVALENT: foreach my $node (keys %equivalents) {

   917       my $equivalencies = $equivalents{$node};

   918       next EQUIVALENT unless $equivalencies;

   920       foreach my $equivalent (@$equivalencies) {

   921           delete $equivalents{$equivalent} unless $equivalent == $node;

   922       }

   923   }

   925      # Note the equivalent objects in a way that will yield the most

   926      # interesting order as we do depth-first traversal later to

   927      # output them.

   928   ROOT: foreach my $equivalent (reverse @finish) {

   929       next ROOT unless $equivalents{$equivalent};

   930       $::Equivalents[$#::Equivalents + 1] = $equivalent;

   932       # XXX Lame! Should figure out function refs.

   933       $::Objects{$equivalent}->{'entrained-size'} = 0;

   934   }

   935 }

   937 # Do it!

   938 compute_equivalents();

   941 #----------------------------------------------------------------------

   942 #

   943 # Compute the size of each node's transitive closure.

   944 #

   945 sub compute_entrained($$) {

   946     my ($parent, $visited) = @_;

   948     $visited->{$parent} = 1;

   950     $::Objects{$parent}->{'entrained-size'} = $::Objects{$parent}->{'size'};

   952     my $children = $::Objects{$parent}->{'children'};

   953     CHILD: foreach my $child (@$children) {

   954         next CHILD if $visited->{$child};

   956         compute_entrained($child, $visited);

   957         $::Objects{$parent}->{'entrained-size'} += $::Objects{$child}->{'entrained-size'};

   958     }

   959 }

   961 if (! $::opt_noentrained) {

   962     my %visited;

   964   PARENT: foreach my $parent (@::Equivalents) {

   965       next PARENT if $visited{$parent};

   966       compute_entrained($parent, \%visited);

   967   }

   968 }

   971 #----------------------------------------------------------------------

   972 #

   973 # Converts a shared library and an address into a file and line number

   974 # using a bunch of addr2line processes.

   975 #

   976 sub addr2line($$) {

   977     my ($dso, $addr) = @_;

   979     # $::Addr2Lines is a global table that maps a DSO's name to a pair

   980     # of filehandles that are talking to an addr2line process.

   981     my $fhs = $::Addr2Lines{$dso};

   982     if (! $fhs) {

   983         if (!(-r $dso)) {

   984             # bogus filename (that happens sometimes), so bail

   985             return { 'dso' => $dso, 'addr' => $addr };

   986         }

   987         my ($in, $out) = (new FileHandle, new FileHandle);

   988         open2($in, $out, "addr2line --exe=$dso") || die "unable to open addr2line --exe=$dso";

   989         $::Addr2Lines{$dso} = $fhs = { 'in' => $in, 'out' => $out };

   990     }

   992     # addr2line takes a hex address as input...

   993     $fhs->{'out'}->print($addr . "\n");

   995     # ...and'll return file:lineno as output

   996     if ($fhs->{'in'}->getline() =~ /([^:]+):(.+)/) {

   997         return { 'file' => $1, 'line' => $2 };

   998     }

   999     else {

  1000         return { 'dso' => $dso, 'addr' => $addr };

  1001     }

  1002 }

  1005 #----------------------------------------------------------------------

  1006 #

  1007 # Dump the objects, using a depth-first traversal.

  1008 #

  1009 sub dump_objects($$$) {

  1010     my ($parent, $visited, $depth) = @_;

  1012     # Have we already seen this?

  1013     my $already_visited = $visited->{$parent};

  1014     return if ($depth == 0 && $already_visited);

  1016     if (! $already_visited) {

  1017         $visited->{$parent} = 1;

  1018         $::Total += $::Objects{$parent}->{'size'};

  1019     }

  1021     my $parententry = $::Objects{$parent};

  1023     # Make an ``object'' div, which'll contain an ``object'' span, two

  1024     # ``toggle'' spans, an invisible ``stack'' div, and the invisible

  1025     # ``children'' div.

  1026     print "<div class='object'>";

  1028     if ($already_visited) {

  1029         print "<a href='#$parent'>";

  1030     }

  1031     else {

  1032         print "<span id='$parent' class='object";

  1033         print " root" if $depth == 0;

  1034         print "'>";

  1035     }

  1037     printf "0x%x&lt;%s&gt;[%d]", $parent, $parententry->{'type'}, $parententry->{'size'};

  1039     if ($already_visited) {

  1040         print "</a>";

  1041         goto DONE;

  1042     }

  1044     if ($depth == 0) {

  1045         print "($parententry->{'entrained-size'})"

  1046             if $parententry->{'entrained-size'};

  1048         print "&nbsp;<span class='toggle' onclick='toggleDisplay(this.parentNode.nextSibling.nextSibling);'>Children</span>"

  1049             if @{$parententry->{'children'}} > 0;

  1050     }

  1052     if (($depth == 0 || !$::opt_nochildstacks) && !$::opt_nostacks) {

  1053         print "&nbsp;<span class='toggle' onclick='toggleDisplay(this.parentNode.nextSibling);'>Stack</span>";

  1054     }

  1056     print "</span>";

  1058     # Print stack traces

  1059     print "<div class='stack'>\n";

  1061     if (($depth == 0 || !$::opt_nochildstacks) && !$::opt_nostacks) {

  1062         my $depth = $::opt_depth;

  1064       FRAME: foreach my $frame (@{$parententry->{'stack'}}) {

  1065           # Only go as deep as they've asked us to.

  1066           last FRAME unless --$depth >= 0;

  1068           # Stack frames look like ``mangled_name[dso address]''

  1069           $frame =~ /([^\]]+)\[(.*) \+0x([0-9A-Fa-f]+)\]/;

  1071           # Convert address to file and line number

  1072           my $mangled = $1;

  1073           my $result = addr2line($2, $3);

  1075           if ($result->{'file'}) {

  1076               # It's mozilla source! Clean up refs to dist/include

  1077               if (($result->{'file'} =~ s/.*\.\.\/\.\.\/dist\/include\//http:\/\/bonsai.mozilla.org\/cvsguess.cgi\?file=/) ||

  1078                   ($result->{'file'} =~ s/.*\/mozilla/http:\/\/bonsai.mozilla.org\/cvsblame.cgi\?file=mozilla/)) {

  1079                   my $prevline = $result->{'line'} - 10;

  1080                   print "<a target=\"lxr_source\" href=\"$result->{'file'}\&mark=$result->{'line'}#$prevline\">$mangled</a><br>\n";

  1081               }

  1082               else {

  1083                   print "$mangled ($result->{'file'}, line $result->{'line'})<br>\n";

  1084               }

  1085           }

  1086           else {

  1087               print "$result->{'dso'} ($result->{'addr'})<br>\n";

  1088           }

  1089       }

  1091     }

  1093     print "</div>";

  1095     # Recurse to children

  1096     if (@{$parententry->{'children'}} >= 0) {

  1097         print "<div class='children'>\n" if $depth == 0;

  1099         foreach my $child (@{$parententry->{'children'}}) {

  1100             dump_objects($child, $visited, $depth + 1);

  1101         }

  1103         print "</div>" if $depth == 0;

  1104     }

  1106   DONE:

  1107     print "</div>\n";

  1108 }

  1111 #----------------------------------------------------------------------

  1112 #

  1113 # Do the output.

  1114 #

  1116 # Force flush on STDOUT. We get funky output unless we do this.

  1117 $| = 1;

  1119 # Header

  1120 print "<html>

  1121 <head>

  1122 <title>Object Graph</title>

  1123 <style type='text/css'>

  1124     body { font: medium monospace; background-color: white; }

  1126     /* give nested div's some margins to make it look like a tree */

  1127     div.children > div.object { margin-left: 1em; }

  1128     div.object > div.object { margin-left: 1em; }

  1130     /* Indent stacks, too */

  1131     div.object > div.stack { margin-left: 3em; }

  1133     /* apply font decorations to special ``object'' spans */

  1134     span.object { font-weight: bold; color: darkgrey; }

  1135     span.object.root { color: black; }

  1137     /* hide ``stack'' divs by default; JS will show them */

  1138     div.stack { display: none; }

  1140     /* hide ``children'' divs by default; JS will show them */

  1141     div.children { display: none; }

  1143     /* make ``toggle'' spans look like links */

  1144     span.toggle { color: blue; text-decoration: underline; cursor: pointer; }

  1145     span.toggle:active { color: red; }

  1146 </style>

  1147 <script language='JavaScript'>

  1148 function toggleDisplay(element)

  1149 {

  1150     element.style.display = (element.style.display == 'block') ? 'none' : 'block';

  1151 }

  1152 </script>

  1153 </head>

  1154 <body>

  1155 ";

  1157 {

  1158 # Body. Display ``roots'', sorted by the amount of memory they

  1159 # entrain. Because of the way we've sorted @::Equivalents, we should

  1160 # get a nice ordering that sorts things with a lot of kids early

  1161 # on. This should yield a fairly "deep" depth-first traversal, with

  1162 # most of the objects appearing as children.

  1163 #

  1164 # XXX I sure hope that Perl implements a stable sort!

  1165     my %visited;

  1167     foreach my $parent (sort { $::Objects{$b}->{'entrained-size'}

  1168                                <=> $::Objects{$a}->{'entrained-size'} }

  1169                         @::Equivalents) {

  1170         dump_objects($parent, \%visited, 0);

  1171         print "\n";

  1172     }

  1173 }

  1175 # Footer

  1176 print "<br> $::Total total bytes\n" if $::Total;

  1177 print "</body>

  1178 </html>

  1179 ";

The Tor Browser / file revision

tools/trace-malloc/leak-soup.pl@97036ab72558

tools/trace-malloc/leak-soup.pl