intl/uconv/tools/parse-mozilla-encoding-table.pl

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 #!/usr/bin/perl
michael@0 2 # parse-mozilla-encoding-table.pl, version 0.6
michael@0 3 #
michael@0 4 # Script to deassemble existing Mozilla *.uf or *.ut files
michael@0 5 # back to source conversion tables.
michael@0 6 # by Anthony Fok <anthony@thizlinux.com>, ThizLinux Laboratory Ltd., 2002/11/27
michael@0 7 # License: GNU General Public License, version 2 or newer
michael@0 8 #
michael@0 9 # Used for verifying HKSCS-1999 hkscs.uf and hkscs.ut so that I can make
michael@0 10 # new ones for HKSCS-2001. This script is quick-and-dirty and not very
michael@0 11 # robust, so if the debug output of fromu/tou ever changes, this script
michael@0 12 # will need to be modified too. :-)
michael@0 13
michael@0 14 my %data = ();
michael@0 15 my $mappingPos = 0;
michael@0 16 my $filename = shift;
michael@0 17 my $mode;
michael@0 18 if ($filename =~ /\.(ut|uf)$/) {
michael@0 19 print $filename, "\n";
michael@0 20 $mode = $1;
michael@0 21 } else {
michael@0 22 die;
michael@0 23 }
michael@0 24
michael@0 25 open(INFILE, "<$filename") or die;
michael@0 26
michael@0 27 # Quick-and-dirty routine to populate %data
michael@0 28 while (<INFILE>) {
michael@0 29 if (/^Begin of Item ([[:xdigit:]]+)/) {
michael@0 30 die if defined($itemId) and hex($itemId) + 1 != hex($1);
michael@0 31 $itemId = $1;
michael@0 32 <INFILE> =~ /Format ([012])/ or die;
michael@0 33 $format = $1;
michael@0 34 <INFILE> =~ /srcBegin = ([[:xdigit:]]+)/ or die;
michael@0 35 $srcBegin = $1;
michael@0 36
michael@0 37 if ($format == 0) { # Range
michael@0 38 <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die;
michael@0 39 $srcEnd = $1;
michael@0 40 <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die;
michael@0 41 $destBegin = $1;
michael@0 42
michael@0 43 for ($i = hex($srcBegin); $i <= hex($srcEnd); $i++) {
michael@0 44 $data{sprintf("%04X",$i)} = sprintf("%04X",
michael@0 45 hex($destBegin) + $i - hex($srcBegin));
michael@0 46 }
michael@0 47
michael@0 48 <INFILE> =~ /^End of Item $itemId\s*$/ or die;
michael@0 49 }
michael@0 50 elsif ($format == 1) { # Mapping
michael@0 51 <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die;
michael@0 52 $srcEnd = $1;
michael@0 53 <INFILE> =~ /mappingOffset = ([[:xdigit:]]+)/ or die;
michael@0 54 $mappingOffset = hex($1);
michael@0 55 die unless $mappingOffset == $mappingPos;
michael@0 56 <INFILE> =~ /Mapping =\s*$/ or die;
michael@0 57 until ($_ = <INFILE>, /^End of Item/) {
michael@0 58 chop;
michael@0 59 for $i (split ' ') {
michael@0 60 $key = sprintf("%04X", hex($srcBegin) - $mappingOffset + $mappingPos++);
michael@0 61 next if $i eq "FFFD";
michael@0 62 if (defined($data{$key})) {
michael@0 63 print "Error: doubly defined. $key was $data{$key}, and now $i.\n";
michael@0 64 } else {
michael@0 65 $data{$key} = $i;
michael@0 66 }
michael@0 67 }
michael@0 68 }
michael@0 69 die unless $mappingPos - $mappingOffset == hex($srcEnd) - hex($srcBegin) + 1;
michael@0 70 /^End of Item $itemId\s*$/ or die;
michael@0 71 }
michael@0 72 else { # Single ($format == 2)
michael@0 73 <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die;
michael@0 74 $destBegin = $1;
michael@0 75 $data{$srcBegin} = $destBegin;
michael@0 76 <INFILE> =~ /^End of Item $itemId\s*$/ or die;
michael@0 77 }
michael@0 78 }
michael@0 79 }
michael@0 80
michael@0 81 # Generate conversion table
michael@0 82 for $key (sort keys %data) {
michael@0 83 if ($mode eq "ut") {
michael@0 84 print "0x$key\t0x$data{$key}\n";
michael@0 85 } elsif ($mode eq "uf") {
michael@0 86 print "0x$data{$key}\t0x$key\n";
michael@0 87 } else {
michael@0 88 die;
michael@0 89 }
michael@0 90 }
michael@0 91
michael@0 92 close INFILE;

mercurial