intl/chardet/tools/charfreq.pl

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 #!/usr/bin/perl
michael@0 2 #!/usr/bin/perl
michael@0 3 #
michael@0 4 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 5 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 6 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 7 open (STAT,$ARGV[0]) || die " cannot open data file $ARGV[0]\n";
michael@0 8 @count;
michael@0 9 while(<STAT>)
michael@0 10 {
michael@0 11 @k = split(/\s+/, $_);
michael@0 12 $count{$k[0]} = $k[1];
michael@0 13 }
michael@0 14 $count = 0;
michael@0 15 while(<STDIN>)
michael@0 16 {
michael@0 17 @ck = split /\s*/, $_;
michael@0 18 $s = 0;
michael@0 19 $fb = 0;
michael@0 20 $cl = $#ck;
michael@0 21 $j = 0;
michael@0 22 while($j < $cl) {
michael@0 23 $cc = unpack("C", $ck[$j]);
michael@0 24 if(0 eq $s ) {
michael@0 25 if($cc > 0x80) {
michael@0 26 if($cc > 0xa0) {
michael@0 27 $fb = $ck[$j];
michael@0 28 $s = 2;
michael@0 29 } else {
michael@0 30 $s = 1;
michael@0 31 }
michael@0 32 }
michael@0 33 } elsif (1 eq $s) {
michael@0 34 } else {
michael@0 35 if($cc > 0xa0) {
michael@0 36 $fb .= $ck[$j];
michael@0 37 $count{$fb}++;
michael@0 38 print $fb . " " .$count{$fb} . "\n";
michael@0 39 $s = 0;
michael@0 40 } else {
michael@0 41 $s = 1;
michael@0 42 }
michael@0 43 }
michael@0 44 $j = $j + 1;
michael@0 45 }
michael@0 46 }
michael@0 47 foreach $c (sort(keys( %count )))
michael@0 48 {
michael@0 49 print $c . " ". $count{$c} . "\n";
michael@0 50 }

mercurial