intl/chardet/tools/charfreq.pl

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/chardet/tools/charfreq.pl	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,50 @@
     1.4 +#!/usr/bin/perl
     1.5 +#!/usr/bin/perl 
     1.6 +#
     1.7 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.8 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.9 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
    1.10 +open (STAT,$ARGV[0]) || die " cannot open data file $ARGV[0]\n";
    1.11 +@count;
    1.12 +while(<STAT>)
    1.13 +{
    1.14 +   @k = split(/\s+/, $_);
    1.15 +   $count{$k[0]} = $k[1];
    1.16 +}
    1.17 +$count = 0;
    1.18 +while(<STDIN>)
    1.19 +{
    1.20 +  @ck = split /\s*/, $_;
    1.21 +  $s = 0;
    1.22 +  $fb = 0;
    1.23 +  $cl = $#ck;
    1.24 +  $j = 0;
    1.25 +  while($j < $cl) {
    1.26 +     $cc = unpack("C", $ck[$j]);
    1.27 +     if(0 eq $s ) {
    1.28 +       if($cc > 0x80) {
    1.29 +         if($cc > 0xa0) {
    1.30 +           $fb = $ck[$j];
    1.31 +           $s = 2;
    1.32 +         } else {
    1.33 +           $s = 1;
    1.34 +         }
    1.35 +       } 
    1.36 +     } elsif (1 eq $s) {
    1.37 +     } else {
    1.38 +         if($cc > 0xa0) {
    1.39 +           $fb .= $ck[$j];
    1.40 +           $count{$fb}++;
    1.41 +           print $fb . " "  .$count{$fb} . "\n";
    1.42 +           $s = 0;
    1.43 +         } else {
    1.44 +           $s = 1;
    1.45 +         }
    1.46 +     }
    1.47 +     $j = $j + 1;
    1.48 +  }
    1.49 +}
    1.50 +foreach $c (sort(keys( %count )))
    1.51 +{
    1.52 +   print $c . " ". $count{$c} . "\n";
    1.53 +}

mercurial