|
1 #!/usr/bin/perl |
|
2 # parse-mozilla-encoding-table.pl, version 0.6 |
|
3 # |
|
4 # Script to deassemble existing Mozilla *.uf or *.ut files |
|
5 # back to source conversion tables. |
|
6 # by Anthony Fok <anthony@thizlinux.com>, ThizLinux Laboratory Ltd., 2002/11/27 |
|
7 # License: GNU General Public License, version 2 or newer |
|
8 # |
|
9 # Used for verifying HKSCS-1999 hkscs.uf and hkscs.ut so that I can make |
|
10 # new ones for HKSCS-2001. This script is quick-and-dirty and not very |
|
11 # robust, so if the debug output of fromu/tou ever changes, this script |
|
12 # will need to be modified too. :-) |
|
13 |
|
14 my %data = (); |
|
15 my $mappingPos = 0; |
|
16 my $filename = shift; |
|
17 my $mode; |
|
18 if ($filename =~ /\.(ut|uf)$/) { |
|
19 print $filename, "\n"; |
|
20 $mode = $1; |
|
21 } else { |
|
22 die; |
|
23 } |
|
24 |
|
25 open(INFILE, "<$filename") or die; |
|
26 |
|
27 # Quick-and-dirty routine to populate %data |
|
28 while (<INFILE>) { |
|
29 if (/^Begin of Item ([[:xdigit:]]+)/) { |
|
30 die if defined($itemId) and hex($itemId) + 1 != hex($1); |
|
31 $itemId = $1; |
|
32 <INFILE> =~ /Format ([012])/ or die; |
|
33 $format = $1; |
|
34 <INFILE> =~ /srcBegin = ([[:xdigit:]]+)/ or die; |
|
35 $srcBegin = $1; |
|
36 |
|
37 if ($format == 0) { # Range |
|
38 <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die; |
|
39 $srcEnd = $1; |
|
40 <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die; |
|
41 $destBegin = $1; |
|
42 |
|
43 for ($i = hex($srcBegin); $i <= hex($srcEnd); $i++) { |
|
44 $data{sprintf("%04X",$i)} = sprintf("%04X", |
|
45 hex($destBegin) + $i - hex($srcBegin)); |
|
46 } |
|
47 |
|
48 <INFILE> =~ /^End of Item $itemId\s*$/ or die; |
|
49 } |
|
50 elsif ($format == 1) { # Mapping |
|
51 <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die; |
|
52 $srcEnd = $1; |
|
53 <INFILE> =~ /mappingOffset = ([[:xdigit:]]+)/ or die; |
|
54 $mappingOffset = hex($1); |
|
55 die unless $mappingOffset == $mappingPos; |
|
56 <INFILE> =~ /Mapping =\s*$/ or die; |
|
57 until ($_ = <INFILE>, /^End of Item/) { |
|
58 chop; |
|
59 for $i (split ' ') { |
|
60 $key = sprintf("%04X", hex($srcBegin) - $mappingOffset + $mappingPos++); |
|
61 next if $i eq "FFFD"; |
|
62 if (defined($data{$key})) { |
|
63 print "Error: doubly defined. $key was $data{$key}, and now $i.\n"; |
|
64 } else { |
|
65 $data{$key} = $i; |
|
66 } |
|
67 } |
|
68 } |
|
69 die unless $mappingPos - $mappingOffset == hex($srcEnd) - hex($srcBegin) + 1; |
|
70 /^End of Item $itemId\s*$/ or die; |
|
71 } |
|
72 else { # Single ($format == 2) |
|
73 <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die; |
|
74 $destBegin = $1; |
|
75 $data{$srcBegin} = $destBegin; |
|
76 <INFILE> =~ /^End of Item $itemId\s*$/ or die; |
|
77 } |
|
78 } |
|
79 } |
|
80 |
|
81 # Generate conversion table |
|
82 for $key (sort keys %data) { |
|
83 if ($mode eq "ut") { |
|
84 print "0x$key\t0x$data{$key}\n"; |
|
85 } elsif ($mode eq "uf") { |
|
86 print "0x$data{$key}\t0x$key\n"; |
|
87 } else { |
|
88 die; |
|
89 } |
|
90 } |
|
91 |
|
92 close INFILE; |