1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/tools/parse-mozilla-encoding-table.pl Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,92 @@ 1.4 +#!/usr/bin/perl 1.5 +# parse-mozilla-encoding-table.pl, version 0.6 1.6 +# 1.7 +# Script to deassemble existing Mozilla *.uf or *.ut files 1.8 +# back to source conversion tables. 1.9 +# by Anthony Fok <anthony@thizlinux.com>, ThizLinux Laboratory Ltd., 2002/11/27 1.10 +# License: GNU General Public License, version 2 or newer 1.11 +# 1.12 +# Used for verifying HKSCS-1999 hkscs.uf and hkscs.ut so that I can make 1.13 +# new ones for HKSCS-2001. This script is quick-and-dirty and not very 1.14 +# robust, so if the debug output of fromu/tou ever changes, this script 1.15 +# will need to be modified too. :-) 1.16 + 1.17 +my %data = (); 1.18 +my $mappingPos = 0; 1.19 +my $filename = shift; 1.20 +my $mode; 1.21 +if ($filename =~ /\.(ut|uf)$/) { 1.22 + print $filename, "\n"; 1.23 + $mode = $1; 1.24 +} else { 1.25 + die; 1.26 +} 1.27 + 1.28 +open(INFILE, "<$filename") or die; 1.29 + 1.30 +# Quick-and-dirty routine to populate %data 1.31 +while (<INFILE>) { 1.32 + if (/^Begin of Item ([[:xdigit:]]+)/) { 1.33 + die if defined($itemId) and hex($itemId) + 1 != hex($1); 1.34 + $itemId = $1; 1.35 + <INFILE> =~ /Format ([012])/ or die; 1.36 + $format = $1; 1.37 + <INFILE> =~ /srcBegin = ([[:xdigit:]]+)/ or die; 1.38 + $srcBegin = $1; 1.39 + 1.40 + if ($format == 0) { # Range 1.41 + <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die; 1.42 + $srcEnd = $1; 1.43 + <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die; 1.44 + $destBegin = $1; 1.45 + 1.46 + for ($i = hex($srcBegin); $i <= hex($srcEnd); $i++) { 1.47 + $data{sprintf("%04X",$i)} = sprintf("%04X", 1.48 + hex($destBegin) + $i - hex($srcBegin)); 1.49 + } 1.50 + 1.51 + <INFILE> =~ /^End of Item $itemId\s*$/ or die; 1.52 + } 1.53 + elsif ($format == 1) { # Mapping 1.54 + <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die; 1.55 + $srcEnd = $1; 1.56 + <INFILE> =~ /mappingOffset = ([[:xdigit:]]+)/ or die; 1.57 + $mappingOffset = hex($1); 1.58 + die unless $mappingOffset == $mappingPos; 1.59 + <INFILE> =~ /Mapping =\s*$/ or die; 1.60 + until ($_ = <INFILE>, /^End of Item/) { 1.61 + chop; 1.62 + for $i (split ' ') { 1.63 + $key = sprintf("%04X", hex($srcBegin) - $mappingOffset + $mappingPos++); 1.64 + next if $i eq "FFFD"; 1.65 + if (defined($data{$key})) { 1.66 + print "Error: doubly defined. $key was $data{$key}, and now $i.\n"; 1.67 + } else { 1.68 + $data{$key} = $i; 1.69 + } 1.70 + } 1.71 + } 1.72 + die unless $mappingPos - $mappingOffset == hex($srcEnd) - hex($srcBegin) + 1; 1.73 + /^End of Item $itemId\s*$/ or die; 1.74 + } 1.75 + else { # Single ($format == 2) 1.76 + <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die; 1.77 + $destBegin = $1; 1.78 + $data{$srcBegin} = $destBegin; 1.79 + <INFILE> =~ /^End of Item $itemId\s*$/ or die; 1.80 + } 1.81 + } 1.82 +} 1.83 + 1.84 +# Generate conversion table 1.85 +for $key (sort keys %data) { 1.86 + if ($mode eq "ut") { 1.87 + print "0x$key\t0x$data{$key}\n"; 1.88 + } elsif ($mode eq "uf") { 1.89 + print "0x$data{$key}\t0x$key\n"; 1.90 + } else { 1.91 + die; 1.92 + } 1.93 +} 1.94 + 1.95 +close INFILE;