intl/chardet/tools/charfreq.pl

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 #!/usr/bin/perl
     2 #!/usr/bin/perl 
     3 #
     4 # This Source Code Form is subject to the terms of the Mozilla Public
     5 # License, v. 2.0. If a copy of the MPL was not distributed with this
     6 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     7 open (STAT,$ARGV[0]) || die " cannot open data file $ARGV[0]\n";
     8 @count;
     9 while(<STAT>)
    10 {
    11    @k = split(/\s+/, $_);
    12    $count{$k[0]} = $k[1];
    13 }
    14 $count = 0;
    15 while(<STDIN>)
    16 {
    17   @ck = split /\s*/, $_;
    18   $s = 0;
    19   $fb = 0;
    20   $cl = $#ck;
    21   $j = 0;
    22   while($j < $cl) {
    23      $cc = unpack("C", $ck[$j]);
    24      if(0 eq $s ) {
    25        if($cc > 0x80) {
    26          if($cc > 0xa0) {
    27            $fb = $ck[$j];
    28            $s = 2;
    29          } else {
    30            $s = 1;
    31          }
    32        } 
    33      } elsif (1 eq $s) {
    34      } else {
    35          if($cc > 0xa0) {
    36            $fb .= $ck[$j];
    37            $count{$fb}++;
    38            print $fb . " "  .$count{$fb} . "\n";
    39            $s = 0;
    40          } else {
    41            $s = 1;
    42          }
    43      }
    44      $j = $j + 1;
    45   }
    46 }
    47 foreach $c (sort(keys( %count )))
    48 {
    49    print $c . " ". $count{$c} . "\n";
    50 }

mercurial