intl/chardet/tools/charfreqtostat.pl

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 #!/usr/bin/perl
michael@0 2 #
michael@0 3 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 6 sub GenNPL {
michael@0 7 my($ret) = << "END_NPL";
michael@0 8 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 9 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 10 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 11 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 12 END_NPL
michael@0 13
michael@0 14 return $ret;
michael@0 15 }
michael@0 16
michael@0 17 print GenNPL();
michael@0 18 $total=0;
michael@0 19 @h;
michael@0 20 @l;
michael@0 21
michael@0 22 while(<STDIN>)
michael@0 23 {
michael@0 24 @k = split(/\s+/, $_);
michael@0 25 @i = unpack("CCCC", $k[0]);
michael@0 26 # printf("%x %x %s",$i[0] , $i[1] , "[" . $k[0] . "] " . $i . " " . $j . " " . $k[1] ."\n");
michael@0 27 if((0xA1 <= $i[0]) && (0xA1 <= $i[1])){
michael@0 28 $total += $k[1];
michael@0 29 $v = $i[0] - 0x00A1;
michael@0 30 $h[$v] += $k[1];
michael@0 31 $u = $i[1] - 0x00A1;
michael@0 32 $l[$u] += $k[1];
michael@0 33 # print "hello $v $h[$v] $u $l[$u]\n";
michael@0 34 }
michael@0 35 }
michael@0 36
michael@0 37
michael@0 38 $ffh = 0.0;
michael@0 39 $ffl = 0.0;
michael@0 40 for($i=0x00A1;$i< 0x00FF ; $i++)
michael@0 41 {
michael@0 42 $fh[$i - 0x00a1] = $h[$i- 0x00a1] / $total;
michael@0 43 $ffh += $fh[$i - 0x00a1];
michael@0 44
michael@0 45 $fl[$i - 0x00a1] = $l[$i- 0x00a1] / $total;
michael@0 46 $ffl += $fl[$i - 0x00a1];
michael@0 47 }
michael@0 48 $mh = $ffh / 94.0;
michael@0 49 $ml = $ffl / 94.0;
michael@0 50
michael@0 51 $sumh=0.0;
michael@0 52 $suml=0.0;
michael@0 53 for($i=0x00A1;$i< 0x00FF ; $i++)
michael@0 54 {
michael@0 55 $sh = $fh[$i - 0x00a1] - $mh;
michael@0 56 $sh *= $sh;
michael@0 57 $sumh += $sh;
michael@0 58
michael@0 59 $sl = $fl[$i - 0x00a1] - $ml;
michael@0 60 $sl *= $sl;
michael@0 61 $suml += $sl;
michael@0 62 }
michael@0 63 $sumh /= 94.0;
michael@0 64 $suml /= 94.0;
michael@0 65 $stdh = sqrt($sumh);
michael@0 66 $stdl = sqrt($suml);
michael@0 67
michael@0 68 print "{\n";
michael@0 69 print " {\n";
michael@0 70 for($i=0x00A1;$i< 0x00FF ; $i++)
michael@0 71 {
michael@0 72 if($i eq 0xfe) {
michael@0 73 printf(" %.6ff \/\/ FreqH[%2x]\n", $fh[$i - 0x00a1] , $i);
michael@0 74 } else {
michael@0 75 printf(" %.6ff, \/\/ FreqH[%2x]\n", $fh[$i - 0x00a1] , $i);
michael@0 76 }
michael@0 77 }
michael@0 78 print " },\n";
michael@0 79 printf ("%.6ff, \/\/ Lead Byte StdDev\n", $stdh);
michael@0 80 printf ("%.6ff, \/\/ Lead Byte Mean\n", $mh);
michael@0 81 printf ("%.6ff, \/\/ Lead Byte Weight\n", $stdh / ($stdh + $stdl));
michael@0 82 print " {\n";
michael@0 83 for($i=0x00A1;$i< 0x00FF ; $i++)
michael@0 84 {
michael@0 85 if($i eq 0xfe) {
michael@0 86 printf(" %.6ff \/\/ FreqL[%2x]\n", $fl[$i - 0x00a1] , $i);
michael@0 87 } else {
michael@0 88 printf(" %.6ff, \/\/ FreqL[%2x]\n", $fl[$i - 0x00a1] , $i);
michael@0 89 }
michael@0 90 }
michael@0 91 print " },\n";
michael@0 92 printf ("%.6ff, \/\/ Trail Byte StdDev\n", $stdl);
michael@0 93 printf ("%.6ff, \/\/ Trail Byte Mean\n", $ml);
michael@0 94 printf ("%.6ff \/\/ Trial Byte Weight\n", $stdl / ($stdh + $stdl));
michael@0 95 print "};\n";

mercurial