michael@0: #!/usr/bin/perl michael@0: # parse-mozilla-encoding-table.pl, version 0.6 michael@0: # michael@0: # Script to deassemble existing Mozilla *.uf or *.ut files michael@0: # back to source conversion tables. michael@0: # by Anthony Fok , ThizLinux Laboratory Ltd., 2002/11/27 michael@0: # License: GNU General Public License, version 2 or newer michael@0: # michael@0: # Used for verifying HKSCS-1999 hkscs.uf and hkscs.ut so that I can make michael@0: # new ones for HKSCS-2001. This script is quick-and-dirty and not very michael@0: # robust, so if the debug output of fromu/tou ever changes, this script michael@0: # will need to be modified too. :-) michael@0: michael@0: my %data = (); michael@0: my $mappingPos = 0; michael@0: my $filename = shift; michael@0: my $mode; michael@0: if ($filename =~ /\.(ut|uf)$/) { michael@0: print $filename, "\n"; michael@0: $mode = $1; michael@0: } else { michael@0: die; michael@0: } michael@0: michael@0: open(INFILE, "<$filename") or die; michael@0: michael@0: # Quick-and-dirty routine to populate %data michael@0: while () { michael@0: if (/^Begin of Item ([[:xdigit:]]+)/) { michael@0: die if defined($itemId) and hex($itemId) + 1 != hex($1); michael@0: $itemId = $1; michael@0: =~ /Format ([012])/ or die; michael@0: $format = $1; michael@0: =~ /srcBegin = ([[:xdigit:]]+)/ or die; michael@0: $srcBegin = $1; michael@0: michael@0: if ($format == 0) { # Range michael@0: =~ /srcEnd = ([[:xdigit:]]+)/ or die; michael@0: $srcEnd = $1; michael@0: =~ /destBegin = ([[:xdigit:]]+)/ or die; michael@0: $destBegin = $1; michael@0: michael@0: for ($i = hex($srcBegin); $i <= hex($srcEnd); $i++) { michael@0: $data{sprintf("%04X",$i)} = sprintf("%04X", michael@0: hex($destBegin) + $i - hex($srcBegin)); michael@0: } michael@0: michael@0: =~ /^End of Item $itemId\s*$/ or die; michael@0: } michael@0: elsif ($format == 1) { # Mapping michael@0: =~ /srcEnd = ([[:xdigit:]]+)/ or die; michael@0: $srcEnd = $1; michael@0: =~ /mappingOffset = ([[:xdigit:]]+)/ or die; michael@0: $mappingOffset = hex($1); michael@0: die unless $mappingOffset == $mappingPos; michael@0: =~ /Mapping =\s*$/ or die; michael@0: until ($_ = , /^End of Item/) { michael@0: chop; michael@0: for $i (split ' ') { michael@0: $key = sprintf("%04X", hex($srcBegin) - $mappingOffset + $mappingPos++); michael@0: next if $i eq "FFFD"; michael@0: if (defined($data{$key})) { michael@0: print "Error: doubly defined. $key was $data{$key}, and now $i.\n"; michael@0: } else { michael@0: $data{$key} = $i; michael@0: } michael@0: } michael@0: } michael@0: die unless $mappingPos - $mappingOffset == hex($srcEnd) - hex($srcBegin) + 1; michael@0: /^End of Item $itemId\s*$/ or die; michael@0: } michael@0: else { # Single ($format == 2) michael@0: =~ /destBegin = ([[:xdigit:]]+)/ or die; michael@0: $destBegin = $1; michael@0: $data{$srcBegin} = $destBegin; michael@0: =~ /^End of Item $itemId\s*$/ or die; michael@0: } michael@0: } michael@0: } michael@0: michael@0: # Generate conversion table michael@0: for $key (sort keys %data) { michael@0: if ($mode eq "ut") { michael@0: print "0x$key\t0x$data{$key}\n"; michael@0: } elsif ($mode eq "uf") { michael@0: print "0x$data{$key}\t0x$key\n"; michael@0: } else { michael@0: die; michael@0: } michael@0: } michael@0: michael@0: close INFILE;