michael@0: #!/usr/bin/perl -w michael@0: michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: # This script is a bunch of utilities for computing statistics about the textruns michael@0: # created during a Gecko run. michael@0: # michael@0: # Usage: michael@0: # 1) Uncomment #define DUMP_TEXT_RUNS in gfxAtsuiFonts.cpp michael@0: # 2) Build michael@0: # 3) Run over some test set, redirecting stdout to a file michael@0: # 4) Pipe that file through this script michael@0: michael@0: # --exclude-spaces-only: ignore all textruns that consistent of zero or more michael@0: # spaces michael@0: my $exclude_spaces = grep(/^--exclude-spaces-only$/, @ARGV); michael@0: michael@0: # --dump-runs: process textruns into a format that can be used by michael@0: # gfxTextRunPerfTest, print that on standard output, and do nothing else michael@0: my $dump_runs = grep(/^--dump-runs$/, @ARGV); michael@0: michael@0: # --obfuscate: ROTL13 the textrun text michael@0: my $obfuscate = grep(/^--obfuscate$/, @ARGV); michael@0: michael@0: my @textruns = (); michael@0: michael@0: while () { michael@0: if (/^0x(\w+)\((.*)\) TEXTRUN "(.*)" ENDTEXTRUN$/) { michael@0: my %tr = ( fontgroup => $1, michael@0: families => $2, michael@0: text => $3 ); michael@0: push(@textruns, \%tr); michael@0: } elsif (/^0x(\w+)\((.*)\) TEXTRUN "(.*)$/) { michael@0: my %tr = ( fontgroup => $1, michael@0: families => $2 ); michael@0: my $text = $3."\n"; michael@0: while () { michael@0: if (/^(.*)" ENDTEXTRUN$/) { michael@0: $text .= $1; michael@0: last; michael@0: } michael@0: $text .= $_; michael@0: } michael@0: $tr{text} = $text; michael@0: push(@textruns, \%tr); michael@0: } michael@0: } michael@0: michael@0: my %quote = ( "\\" => 1, "\"" => 1 ); michael@0: michael@0: sub quote_str { michael@0: my ($text) = @_; michael@0: my @chars = split(//, $text); michael@0: my @strs = (); michael@0: foreach my $c (@chars) { michael@0: if (ord($c) >= 0x80) { michael@0: $c = "\\x".sprintf("%x",ord($c)).'""'; michael@0: } elsif ($quote{$c}) { michael@0: $c = "\\$c"; michael@0: } elsif ($c eq "\n") { michael@0: $c = " "; michael@0: } michael@0: push(@strs, $c); michael@0: } michael@0: return '"'.join("", @strs).'"'; michael@0: } michael@0: michael@0: if ($dump_runs) { michael@0: foreach my $tr (@textruns) { michael@0: print "{ ", "e_str($tr->{families}), ",\n"; michael@0: my $text = $tr->{text}; michael@0: if ($obfuscate) { michael@0: $text =~ tr/a-mA-Mn-zN-Z/n-zN-Za-mA-M/; michael@0: } michael@0: print " ", "e_str($text), " },\n"; michael@0: } michael@0: exit(0); michael@0: } michael@0: michael@0: my %trs_by_text = (); michael@0: my %trs_by_text_and_fontgroup = (); michael@0: my %trs_by_trimmed_text_and_fontgroup = (); michael@0: my @tr_lengths = (); michael@0: michael@0: $trs_by_text{" "} = []; michael@0: $trs_by_text{""} = []; michael@0: michael@0: sub trim { michael@0: my ($s) = @_; michael@0: $s =~ s/^ *//g; michael@0: $s =~ s/ *$//g; michael@0: return $s; michael@0: } michael@0: michael@0: my $total_textruns = 0; michael@0: michael@0: foreach my $tr (@textruns) { michael@0: if ($exclude_spaces && $tr->{text} =~ /^ *$/) { michael@0: next; michael@0: } michael@0: ++$total_textruns; michael@0: push(@{$trs_by_text{$tr->{text}}}, $tr); michael@0: push(@{$trs_by_text_and_fontgroup{$tr->{fontgroup}.$tr->{text}}}, $tr); michael@0: push(@{$trs_by_trimmed_text_and_fontgroup{$tr->{fontgroup}.&trim($tr->{text})}}, $tr); michael@0: if (1 < scalar(@{$trs_by_trimmed_text_and_fontgroup{$tr->{fontgroup}.&trim($tr->{text})}})) { michael@0: $tr_lengths[length($tr->{text})]++; michael@0: } michael@0: } michael@0: michael@0: print "Number of textruns:\t$total_textruns\n"; michael@0: print "Number of textruns which are one space:\t", scalar(@{$trs_by_text{" "}}), "\n"; michael@0: print "Number of textruns which are empty:\t", scalar(@{$trs_by_text{""}}), "\n"; michael@0: michael@0: my $count = 0; michael@0: foreach my $k (keys(%trs_by_text)) { michael@0: if ($k =~ /^ *$/) { michael@0: $count += @{$trs_by_text{$k}}; michael@0: } michael@0: } michael@0: print "Number of textruns which are zero or more spaces:\t$count\n"; michael@0: michael@0: print "Number of unique textruns by text and fontgroup:\t", scalar(keys(%trs_by_text_and_fontgroup)), "\n"; michael@0: print "Number of unique textruns by trimmed text and fontgroup:\t", scalar(keys(%trs_by_trimmed_text_and_fontgroup)), "\n"; michael@0: michael@0: my $sum = 0; michael@0: my $weighted_sum = 0; michael@0: if (1) { michael@0: print "Textrun length distribution:\n"; michael@0: for my $i (0..(scalar(@tr_lengths)-1)) { michael@0: my $amount = defined($tr_lengths[$i])?$tr_lengths[$i]:0; michael@0: $sum += $amount; michael@0: $weighted_sum += $i*$amount; michael@0: print "$i\t$sum\t$weighted_sum\n"; michael@0: } michael@0: }