|
1 #!/usr/local/bin/perl |
|
2 use strict; |
|
3 |
|
4 my @source_files; |
|
5 |
|
6 my @sjis_h; |
|
7 $sjis_h[0] = -1; |
|
8 @sjis_h[0x81..0x9f] = map { 0x2100 + $_ * 0x200 } (0 .. 30); |
|
9 @sjis_h[0xe0..0xef] = map { 0x5F00 + $_ * 0x200 } (0 .. 15); |
|
10 @sjis_h[0xf0..0xf9] = (-2) x 10; |
|
11 my @sjis_l; |
|
12 @sjis_l[0x40..0x7e] = (0x21..0x5f); |
|
13 @sjis_l[0x80..0xfc] = (0x60..0x7e, 0x121..0x17e); |
|
14 |
|
15 sub sjis_to_jis { |
|
16 my ($s) = @_; |
|
17 my $j; |
|
18 my $type; |
|
19 |
|
20 my $h = $sjis_h[($s>>8)&0xff]; |
|
21 |
|
22 if ( $h > 0 ) { # jis0208 |
|
23 |
|
24 my $l = $sjis_l[$s&0xff]; |
|
25 if ( $l == 0 ) { |
|
26 $j = $s; |
|
27 $type = 'sjis2undef'; |
|
28 } else { |
|
29 $j = $h + $l; |
|
30 if ( $j >= 0x3000 && $j < 0x7500 ) { # jis0208 kanji |
|
31 $type = 'jis0208'; |
|
32 } elsif ( $j < 0x2900 ) { # jis0208 |
|
33 $type = 'jis0208'; |
|
34 } else { |
|
35 $type = 'jis0208undef'; |
|
36 } |
|
37 } |
|
38 |
|
39 } elsif ( $h == -1 ) { # single byte |
|
40 |
|
41 $j = $s; |
|
42 if ( $s <= 0x7f ) { # jis0201 roman |
|
43 $type = 'jis0201'; |
|
44 } elsif ( $s >= 0xa1 && $s <= 0xdf ) { # jis0201 kana |
|
45 $type = 'jis0201'; |
|
46 } else { # sjis single byte undefined |
|
47 $type = 'sjis1undef'; |
|
48 } |
|
49 |
|
50 } elsif ( $h == -2 ) { # private use |
|
51 $j = $s; |
|
52 $type = 'private'; |
|
53 |
|
54 } else { # sjis undefined |
|
55 $j = $s; |
|
56 $type = 'sjis2undef'; |
|
57 } |
|
58 |
|
59 return ($j, $type); |
|
60 } |
|
61 |
|
62 |
|
63 sub read_sjis_map { |
|
64 my ($filename, $s_col, $u_col) = @_; |
|
65 my %map; |
|
66 open MAP, $filename or die $!; |
|
67 while (<MAP>) { |
|
68 my @cols = split /\s+/; |
|
69 my ($s, $u) = @cols[$s_col, $u_col]; |
|
70 $s =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next; |
|
71 |
|
72 $s = oct($s); |
|
73 $u = oct($u); |
|
74 |
|
75 my ($j, $type) = sjis_to_jis($s); |
|
76 push @{$map{$type}}, [$j, $s, $u]; |
|
77 |
|
78 } |
|
79 close MAP or warn $!; |
|
80 push @source_files, $filename; |
|
81 return %map; |
|
82 } |
|
83 |
|
84 sub read_0212_map { |
|
85 my ($filename, $j_col, $u_col) = @_; |
|
86 my $map; |
|
87 open MAP, $filename or die $!; |
|
88 while (<MAP>) { |
|
89 my @cols = split /\s+/; |
|
90 my ($j, $u) = @cols[$j_col, $u_col]; |
|
91 $j =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next; |
|
92 |
|
93 $j = oct($j); |
|
94 $u = oct($u); |
|
95 $u = 0xff5e if $u == 0x007e; |
|
96 |
|
97 push @$map, [$j, 0, $u]; |
|
98 } |
|
99 close MAP or warn $!; |
|
100 push @source_files, $filename; |
|
101 return $map; |
|
102 } |
|
103 |
|
104 |
|
105 my %printed; |
|
106 sub write_fromu_map { |
|
107 my ($filename, $code, @maps) = @_; |
|
108 open MAP, ">$filename" or die $!; |
|
109 foreach my $map (@maps) { |
|
110 foreach my $pair (@$map) { |
|
111 my ($j, $s, $u) = @$pair; |
|
112 if ( $code eq 'sjis' ) { |
|
113 $j = $s; |
|
114 } |
|
115 if ( defined($printed{$u}) ) { |
|
116 if ( $printed{$u} ne $j ) { |
|
117 printf "conflict 0x%04x to 0x%04x, 0x%04x\n", $u, $printed{$u}, $j; |
|
118 } |
|
119 } else { |
|
120 if ( $j < 0x100 ) { |
|
121 printf MAP "0x%02X\t0x%04X\n", $j, $u; |
|
122 } else { |
|
123 printf MAP "0x%04X\t0x%04X\n", $j, $u; |
|
124 } |
|
125 $printed{$u} = $j; |
|
126 } |
|
127 } |
|
128 } |
|
129 close MAP or warn $!; |
|
130 } |
|
131 |
|
132 my @table; |
|
133 my %table; |
|
134 my $table_next_count = 0; |
|
135 |
|
136 sub get_94table_index { |
|
137 my ($map_table) = @_; |
|
138 my $key = join ',', map {int($map_table->[$_])} (0 .. 93); |
|
139 my $table_index = $table{$key}; |
|
140 if ( !defined($table_index) ) { |
|
141 $table_index = $table_next_count; |
|
142 $table_next_count += 94; |
|
143 $table[$table_index] = $map_table; |
|
144 $table{$key} = $table_index; |
|
145 } |
|
146 return $table_index; |
|
147 } |
|
148 |
|
149 sub get_188table_index { |
|
150 my ($map_table) = @_; |
|
151 my $map_table1 = [ @{$map_table}[0 .. 93] ]; |
|
152 my $map_table2 = [ @{$map_table}[94 .. 187] ]; |
|
153 my $key = join ',', map {int($map_table->[$_])} (0 .. 187); |
|
154 my $key1 = join ',', map {int($map_table1->[$_])} (0 .. 93); |
|
155 my $key2 = join ',', map {int($map_table2->[$_])} (0 .. 93); |
|
156 my $table_index = $table{$key}; |
|
157 if ( !defined($table_index) ) { |
|
158 $table_index = $table_next_count; |
|
159 $table_next_count += 188; |
|
160 $table[$table_index] = $map_table1; |
|
161 $table[$table_index + 94] = $map_table2; |
|
162 $table{$key} = $table_index; |
|
163 $table{$key1} = $table_index unless defined($table{$key1}); |
|
164 $table{$key2} = $table_index + 94 unless defined($table{$key2}); |
|
165 } |
|
166 return $table_index; |
|
167 } |
|
168 |
|
169 get_188table_index([]); |
|
170 |
|
171 sub print_sjis_table_index { |
|
172 my @maps = @_; |
|
173 my %map_table; |
|
174 foreach my $map (@maps) { |
|
175 foreach my $pair (@$map) { |
|
176 my ($j, $s, $u) = @$pair; |
|
177 my $row = $s >> 8; |
|
178 my $cell = $s&0xff; |
|
179 if ( $cell >= 0x40 && $cell <= 0x7e ) { |
|
180 $cell -= 0x40; |
|
181 } elsif ( $cell >= 0x80 && $cell <= 0xfc ) { |
|
182 $cell -= 0x41; |
|
183 } else { |
|
184 next; |
|
185 } |
|
186 if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) { |
|
187 print "conflict!\n"; |
|
188 } |
|
189 $map_table{$row}->[$cell] = $u; |
|
190 } |
|
191 } |
|
192 |
|
193 for ( my $i = 0x80; $i < 0x100; $i++ ) { |
|
194 if ( ($i & 0x7) == 0 ) { |
|
195 print MAP "\n "; |
|
196 } |
|
197 if ( $i >= 0xa1 && $i <= 0xdf ) { |
|
198 printf MAP " 0x%04X,", $i + 0xfec0; |
|
199 } elsif ( $i >= 0xf0 && $i <= 0xf9 ) { |
|
200 printf MAP " 0x%04X,", 0xe000 + ($i - 0xf0) * 188; |
|
201 } elsif ( $i == 0x80 ) { |
|
202 print MAP " 0xFFFD,"; |
|
203 } elsif ( $i == 0xa0 ) { |
|
204 print MAP " 0xF8F0,"; |
|
205 } elsif ( $i >= 0xfd ) { |
|
206 printf MAP " 0x%04X,", $i + (0xf8f1 - 0xfd); |
|
207 } else { |
|
208 my $table_index = get_188table_index($map_table{$i}); |
|
209 printf MAP " %6d,", $table_index; |
|
210 } |
|
211 } |
|
212 } |
|
213 |
|
214 sub print_jis_table_index { |
|
215 my @maps = @_; |
|
216 my %map_table; |
|
217 foreach my $map (@maps) { |
|
218 foreach my $pair (@$map) { |
|
219 my ($j, $s, $u) = @$pair; |
|
220 my $row = $j >> 8; |
|
221 my $cell = ($j&0xff) - 0x21; |
|
222 if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) { |
|
223 print "conflict!\n"; |
|
224 } |
|
225 $map_table{$row}->[$cell] = $u; |
|
226 } |
|
227 } |
|
228 |
|
229 for ( my $i = 0; $i < 0x80; $i++ ) { |
|
230 if ( ($i & 0x7) == 0 ) { |
|
231 print MAP "\n "; |
|
232 } |
|
233 if ( $i >= 0x21 && $i <= 0x7e ) { |
|
234 my $table_index = get_94table_index($map_table{$i}); |
|
235 printf MAP " %6d,", $table_index; |
|
236 } else { |
|
237 print MAP " 0xFFFD,"; |
|
238 } |
|
239 } |
|
240 } |
|
241 |
|
242 sub print_table_index { |
|
243 my ($map_name, @maps) = @_; |
|
244 print MAP "static const uint16_t g${map_name}IndexShiftJis[] = {"; |
|
245 print_sjis_table_index(@maps); |
|
246 print MAP "\n};\n"; |
|
247 print MAP "static const uint16_t g${map_name}IndexJis0208[] = {"; |
|
248 print_jis_table_index(@maps); |
|
249 print MAP "\n};\n"; |
|
250 print MAP "static const uint16_t * const g${map_name}Index[] = {"; |
|
251 print MAP "\n g${map_name}IndexShiftJis, g${map_name}IndexJis0208"; |
|
252 print MAP "\n};\n\n"; |
|
253 } |
|
254 |
|
255 sub print_0212_table_index { |
|
256 my ($map_name, @maps) = @_; |
|
257 print MAP "static const uint16_t g${map_name}Index[] = {"; |
|
258 print_jis_table_index(@maps); |
|
259 print MAP "\n};\n\n"; |
|
260 } |
|
261 |
|
262 |
|
263 sub print_table { |
|
264 print MAP "static const uint16_t gJapaneseMap[] = {"; |
|
265 for ( my $i = 0; $i < $table_next_count; $i += 94 ) { |
|
266 my $index = $i; |
|
267 print MAP "\n /* index $index */\n "; |
|
268 my $map_table = $table[$i]; |
|
269 my $print_count = 1; |
|
270 for ( my $j = 0; $j < 94; $j++ ) { |
|
271 my $u = $map_table->[$j]; |
|
272 if ( $u == 0 ) { $u = 0xfffd; } |
|
273 printf MAP " 0x%04X,", $u; |
|
274 if ( ++$print_count == 8 ) { |
|
275 print MAP "\n "; |
|
276 $print_count = 0; |
|
277 } |
|
278 } |
|
279 } |
|
280 print MAP "\n};\n"; |
|
281 } |
|
282 |
|
283 |
|
284 my %cp932 = read_sjis_map('CP932.TXT', 0, 1); |
|
285 my %ibm = read_sjis_map('IBM943.TXT', 0, 1); |
|
286 my $jis0212 = read_0212_map('JIS0212.TXT', 0, 1); |
|
287 |
|
288 %printed = (); |
|
289 write_fromu_map('jis0201-uf-unify', 'jis', |
|
290 $cp932{jis0201}, |
|
291 $ibm{jis0201} |
|
292 ); |
|
293 write_fromu_map('jis0208-uf-unify', 'jis', |
|
294 $cp932{jis0208}, |
|
295 $ibm{jis0208} |
|
296 ); |
|
297 |
|
298 %printed = (); |
|
299 write_fromu_map('jis0208ext-uf-unify', 'jis', |
|
300 $cp932{jis0208undef}, |
|
301 $ibm{jis0208undef} |
|
302 ); |
|
303 |
|
304 %printed = (); |
|
305 write_fromu_map('sjis-uf-unify', 'sjis', |
|
306 @cp932{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'}, |
|
307 @ibm{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'} |
|
308 ); |
|
309 |
|
310 open MAP, ">japanese.map" or die $!; |
|
311 binmode MAP; |
|
312 |
|
313 while (<DATA>) { |
|
314 if ( /^!/ ) { last; } |
|
315 print MAP; |
|
316 } |
|
317 print MAP "/* generated by jamap.pl @source_files */\n\n"; |
|
318 print MAP <<EOM; |
|
319 // IE-compatible handling of undefined codepoints: |
|
320 // 0x80 --> U+0080 |
|
321 // 0xa0 --> U+F8F0 |
|
322 // 0xfd --> U+F8F1 |
|
323 // 0xfe --> U+F8F2 |
|
324 // 0xff --> U+F8F3 |
|
325 EOM |
|
326 |
|
327 print_table_index('CP932', @cp932{'jis0208', 'jis0208undef', 'sjis2undef'}); |
|
328 print_table_index('IBM943', @ibm{'jis0208', 'jis0208undef', 'sjis2undef'}); |
|
329 print_0212_table_index('JIS0212', $jis0212); |
|
330 print_table(); |
|
331 |
|
332 close MAP or warn $!; |
|
333 |
|
334 __DATA__ |
|
335 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
336 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
337 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
338 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
339 |
|
340 ! |