1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/chardet/tools/charfreq.pl Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,50 @@ 1.4 +#!/usr/bin/perl 1.5 +#!/usr/bin/perl 1.6 +# 1.7 +# This Source Code Form is subject to the terms of the Mozilla Public 1.8 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.9 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.10 +open (STAT,$ARGV[0]) || die " cannot open data file $ARGV[0]\n"; 1.11 +@count; 1.12 +while(<STAT>) 1.13 +{ 1.14 + @k = split(/\s+/, $_); 1.15 + $count{$k[0]} = $k[1]; 1.16 +} 1.17 +$count = 0; 1.18 +while(<STDIN>) 1.19 +{ 1.20 + @ck = split /\s*/, $_; 1.21 + $s = 0; 1.22 + $fb = 0; 1.23 + $cl = $#ck; 1.24 + $j = 0; 1.25 + while($j < $cl) { 1.26 + $cc = unpack("C", $ck[$j]); 1.27 + if(0 eq $s ) { 1.28 + if($cc > 0x80) { 1.29 + if($cc > 0xa0) { 1.30 + $fb = $ck[$j]; 1.31 + $s = 2; 1.32 + } else { 1.33 + $s = 1; 1.34 + } 1.35 + } 1.36 + } elsif (1 eq $s) { 1.37 + } else { 1.38 + if($cc > 0xa0) { 1.39 + $fb .= $ck[$j]; 1.40 + $count{$fb}++; 1.41 + print $fb . " " .$count{$fb} . "\n"; 1.42 + $s = 0; 1.43 + } else { 1.44 + $s = 1; 1.45 + } 1.46 + } 1.47 + $j = $j + 1; 1.48 + } 1.49 +} 1.50 +foreach $c (sort(keys( %count ))) 1.51 +{ 1.52 + print $c . " ". $count{$c} . "\n"; 1.53 +}