intl/chardet/tools/charfreq.pl

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 #!/usr/bin/perl
     2 #!/usr/bin/perl 
     3 #
     4 # This Source Code Form is subject to the terms of the Mozilla Public
     5 # License, v. 2.0. If a copy of the MPL was not distributed with this
     6 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     7 open (STAT,$ARGV[0]) || die " cannot open data file $ARGV[0]\n";
     8 @count;
     9 while(<STAT>)
    10 {
    11    @k = split(/\s+/, $_);
    12    $count{$k[0]} = $k[1];
    13 }
    14 $count = 0;
    15 while(<STDIN>)
    16 {
    17   @ck = split /\s*/, $_;
    18   $s = 0;
    19   $fb = 0;
    20   $cl = $#ck;
    21   $j = 0;
    22   while($j < $cl) {
    23      $cc = unpack("C", $ck[$j]);
    24      if(0 eq $s ) {
    25        if($cc > 0x80) {
    26          if($cc > 0xa0) {
    27            $fb = $ck[$j];
    28            $s = 2;
    29          } else {
    30            $s = 1;
    31          }
    32        } 
    33      } elsif (1 eq $s) {
    34      } else {
    35          if($cc > 0xa0) {
    36            $fb .= $ck[$j];
    37            $count{$fb}++;
    38            print $fb . " "  .$count{$fb} . "\n";
    39            $s = 0;
    40          } else {
    41            $s = 1;
    42          }
    43      }
    44      $j = $j + 1;
    45   }
    46 }
    47 foreach $c (sort(keys( %count )))
    48 {
    49    print $c . " ". $count{$c} . "\n";
    50 }

mercurial