Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 #!/usr/local/bin/perl
2 # -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
3 #
4 # This Source Code Form is subject to the terms of the Mozilla Public
5 # License, v. 2.0. If a copy of the MPL was not distributed with this
6 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 %gb18030tounicode = {};
8 %unicodetogb18030 = {};
9 %unicodetocp936 = {};
10 %cp936tounicode = {};
11 %tounicodecommon = {};
12 %gb18030tounicodeuniq = {};
13 %gb180304btounicode = {};
14 %cp936tounicodeuniq = {};
16 %map = {};
17 $rowwidth = ((0xff - 0x80)+(0x7f - 0x40));
18 sub cp936tonum()
19 {
20 my($cp936) = (@_);
21 my($first,$second,$jnum);
22 $first = hex(substr($cp936,2,2));
23 $second = hex(substr($cp936,4,2));
24 $jnum = ($first - 0x81 ) * $rowwidth;
25 if($second >= 0x80)
26 {
27 $jnum += $second - 0x80 + (0x7f-0x40);
28 }
29 else
30 {
31 $jnum += $second - 0x40;
32 }
33 return $jnum;
34 }
35 sub addeudc()
36 {
37 my($l,$h,$hl,$us);
39 $u = 0xE000;
40 $us = sprintf "%04X", $u;
41 # For AAA1-AFFE
42 for($h=0xAA; $h <=0xAF;$h++)
43 {
44 for($l=0xA1; $l <=0xFE;$l++,$u++)
45 {
46 $us = sprintf "%04X", $u;
47 $hl = sprintf "%02X%02X", $h, $l;
48 $unicodetocp936{$us} = $hl;
49 }
50 }
52 # For F8A1-FEFE
53 $us = sprintf "%04X", $u;
54 for($h=0xF8; $h <=0xFE;$h++)
55 {
56 for($l=0xA1; $l <=0xFE;$l++,$u++)
57 {
58 $us = sprintf "%04X", $u;
59 $hl = sprintf "%02X%02X", $h, $l;
60 $unicodetocp936{$us} = $hl;
61 }
62 }
64 # For A140-A7A0
65 $us = sprintf "%04X", $u;
66 for($h=0xA1; $h <=0xA7;$h++)
67 {
68 for($l=0x40; $l <=0x7E;$l++,$u++)
69 {
70 $us = sprintf "%04X", $u;
71 $hl = sprintf "%02X%02X", $h, $l;
72 $unicodetocp936{$us} = $hl;
73 }
74 # We need to skip 7F
75 for($l=0x80; $l <=0xA0;$l++,$u++)
76 {
77 $us = sprintf "%04X", $u;
78 $hl = sprintf "%02X%02X", $h, $l;
79 $unicodetocp936{$us} = $hl;
80 }
81 }
82 }
84 sub readcp936()
85 {
86 open(CP936, "<CP936.txt") || die "Cannot open CP936 file";
87 while(<CP936>)
88 {
89 if(! /^#/) {
90 chop();
91 ($gb, $u) = split(/\t/, $_);
92 if($u =~ /^0x/) {
93 $u1 = substr($u, 2, 4);
94 $gb1 = substr($gb, 2, 4);
95 $cp936tounicode{$gb1} = $u1;
96 if($unicodetocp936{$u1} == "") {
97 $unicodetocp936{$u1} = $gb1;
98 } else {
99 "WARNING: Unicode " . $u1 . " already map to CP936 " .
100 $unicodetocp936{$u1} . " when we try to map to " . $gb1 . "\n";
101 }
103 }
104 }
105 }
106 }
107 sub readgb18030()
108 {
109 open(GB18030, "<GB18030") || die "Cannot open GB18030 file";
110 while(<GB18030>)
111 {
112 if(/^[0-9A-F]/) {
113 chop();
114 ($u, $gb) = split(/\s/, $_);
115 $gb18030tounicode{$gb} = $u;
116 if( $unicodetogb18030{$u} == "" ) {
117 $unicodetogb18030{$u} = $gb;
118 } else {
119 "WARNING: Unicode " . $u1 . " already map to CP936 " .
120 $unicodetocp936{$u1} . " when we try to map to " . $gb1 . "\n";
121 }
122 }
123 }
124 }
125 sub splittable()
126 {
127 my($i, $u);
128 for($i = 0; $i < 0x10000; $i++) {
129 $u = sprintf "%04X", $i;
130 if($unicodetogb18030{$u} eq $unicodetocp936{$u}) {
131 if($unicodetogb18030{$u} ne "") {
132 $tounicodecommon{$unicodetogb18030{$u}} = $u;
133 } else {
134 # print $u . "|" . $unicodetogb18030{$u} . "|" . $unicodetocp936{$u} . "\n";
135 }
136 } else {
137 if($unicodetogb18030{$u} ne "" ) {
138 if($unicodetogb18030{$u}.length > 4) {
139 $gb180304btounicode{$unicodetogb18030{$u}} = $u;
140 } else {
141 $gb18030tounicodeuniq{$unicodetogb18030{$u}} = $u;
142 }
143 }
144 if($unicodetocp936{$u} ne "" ) {
145 $cp936tounicodeuniq{$unicodetocp936{$u}} = $u;
146 }
147 }
148 }
149 }
150 sub gb4bytestoidx()
151 {
152 my($gb) = @_;
153 my($b1,$b2, $b3, $b4,$idx);
154 $b1 = hex(substr($gb, 0, 2)) - 0x81;
155 $b2 = hex(substr($gb, 2, 2)) - 0x30;
156 $b3 = hex(substr($gb, 4, 2)) - 0x81;
157 $b4 = hex(substr($gb, 6, 2)) - 0x30;
158 $idx = sprintf "%04X" , ((($b1 * 10) + $b2 ) * 126 + $b3) * 10 + $b4;
159 return $idx;
160 }
161 sub printcommontable()
162 {
163 open ( GBKCOMMON, ">gbkcommon.txt" ) || die "cannot open gbkcommon.txt";
164 foreach $gb (sort(keys %tounicodecommon)) {
165 print GBKCOMMON "0x" . $gb . "\t0x" . $tounicodecommon{$gb} . "\n";
166 }
167 close GBKCOMMON;
168 }
169 sub printcp936table()
170 {
171 open ( CP936UNIQ, ">cp936uniq.txt" ) || die "cannot open cp936uniq.txt";
172 foreach $gb (sort(keys %cp936tounicodeuniq)) {
173 print CP936UNIQ "0x" . $gb . "\t0x" . $cp936tounicodeuniq{$gb} . "\n";
174 }
175 close CP936UNIQ;
176 }
177 sub printgb180304btable()
178 {
179 open ( GB180304B, ">gb180304b.txt" ) || die "cannot open gb180304b.txt";
180 foreach $gb (sort(keys %gb180304btounicode)) {
181 if($gb180304btounicode{$gb} ne "FFFF" ) {
182 print GB180304B "0x" . &gb4bytestoidx($gb) . "\t0x" . $gb180304btounicode{$gb} . "\t# 0x" . $gb . "\n";
183 }
184 }
185 close GB180304B;
186 }
187 sub printgb18030table()
188 {
189 open ( GB18030UNIQ, ">gb18030uniq.txt" ) || die "cannot open gb18030uniq.txt";
190 foreach $gb (sort(keys %gb18030tounicodeuniq)) {
191 print GB18030UNIQ "0x" . $gb . "\t0x" . $gb18030tounicodeuniq{$gb} . "\n";
192 }
193 close GB18030UNIQ;
194 }
196 sub genufut()
197 {
198 print ( "umaptable -uf < gb18030uniq.txt > gb18030uniq2b.uf\n");
199 system( "umaptable -uf < gb18030uniq.txt > gb18030uniq2b.uf");
201 print ( "umaptable -ut < gb18030uniq.txt > gb18030uniq2b.ut\n");
202 system( "umaptable -ut < gb18030uniq.txt > gb18030uniq2b.ut");
204 print ( "umaptable -uf < cp936uniq.txt > gbkuniq2b.uf\n") ;
205 system( "umaptable -uf < cp936uniq.txt > gbkuniq2b.uf") ;
207 print ( "umaptable -ut < cp936uniq.txt > gbkuniq2b.ut\n") ;
208 system( "umaptable -ut < cp936uniq.txt > gbkuniq2b.ut") ;
210 print ( "umaptable -uf < gb180304b.txt > gb180304bytes.uf\n") ;
211 system( "umaptable -uf < gb180304b.txt > gb180304bytes.uf") ;
213 print ( "umaptable -ut < gb180304b.txt > gb180304bytes.ut\n") ;
214 system( "umaptable -ut < gb180304b.txt > gb180304bytes.ut") ;
216 print ( "perl cp936tocdx.pl > cp936map.h\n");
217 system( "perl cp936tocdx.pl > cp936map.h");
218 }
220 &readgb18030();
221 &readcp936();
222 &addeudc();
223 &splittable();
224 &printcommontable();
225 &printgb180304btable();
226 &printgb18030table();
227 &printcp936table();
228 &genufut();