|
1 #!/usr/bin/perl |
|
2 $ID = "mkjpconv.pl @ARGV (Time-stamp: <2001-08-08 18:54:54 shom>)"; |
|
3 |
|
4 # This Source Code Form is subject to the terms of the Mozilla Public |
|
5 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
6 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
7 |
|
8 # |
|
9 # based on CP932.TXT from unicode.org |
|
10 # additional information from SHIFTJIS.TXT from unicode.org |
|
11 # |
|
12 # mapping policy: |
|
13 # jis0208 to unicode : based on CP932 |
|
14 # unicode to jis0208 : based on CP932 |
|
15 # the lowest code is used for dual mapping to jis0208 |
|
16 # ascii region : based on ISO8859-1 ( same as CP932 ) IGNORE? |
|
17 # kana region : based on CP932 |
|
18 # IBM Ext(0xFxxx>) : premap to NEC region ( mappable to JIS ) |
|
19 |
|
20 if ($ARGV[0] eq "") { |
|
21 print STDERR "usage: mkjpconv.pl SHIFTJIS.TXT <INFILE(ex:CP932.TXT)> [Another check]\n"; |
|
22 exit 1; |
|
23 } |
|
24 |
|
25 open (SI, "SHIFTJIS.TXT") || die; |
|
26 while(<SI>) { |
|
27 ($hi,$lo) = /^0x(..)?(..)\s/; |
|
28 if ($lo eq "") { next; } |
|
29 if ($hi eq "") { $hi=" " } |
|
30 $defined{"0x$hi$lo"} = 1; |
|
31 } |
|
32 close (SI); |
|
33 |
|
34 shift(@ARGV); |
|
35 |
|
36 $src = $ARGV[0]; |
|
37 |
|
38 $gendir = "$src.d"; |
|
39 mkdir("$src.d"); |
|
40 |
|
41 $sufile = "sjis2ucs-$src.map"; |
|
42 $usfile = "ucs2sjis-$src.map"; |
|
43 $jufile = "jis2ucs-$src.map"; |
|
44 $jeufile = "jisext2ucs-$src.map"; |
|
45 $jaufile = "jisasc2ucs-$src.map"; |
|
46 $jrkufile = "jiskana2ucs-$src.map"; |
|
47 $ujfile = "ucs2jis-$src.map"; |
|
48 $ujefile = "ucs2jisext-$src.map"; |
|
49 $ujafile = "ucs2jisasc-$src.map"; |
|
50 $ujrkfile = "ucs2jiskana-$src.map"; |
|
51 $ibmnecfile = "$gendir/IBMNEC.map"; |
|
52 $jdxfile = "$gendir/jis0208.ump"; |
|
53 $jdxextfile = "jis0208ext.ump"; |
|
54 $commentfile = "comment-$src.txt"; |
|
55 |
|
56 open (IN, "NPL.header") || die; |
|
57 while(<IN>) { |
|
58 $NPL .= $_; |
|
59 } |
|
60 close (IN); |
|
61 |
|
62 foreach $infile ( @ARGV ) { |
|
63 |
|
64 open (IN, "$infile") || die; |
|
65 |
|
66 while(<IN>) { |
|
67 ($from, $to, $seq, $dum, $comment) = |
|
68 /^\s*(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)(\+0x\S+)?(\s+\#\s*(\S.*))?$/; |
|
69 if ( $seq ne "" ) { |
|
70 print "Warning: Unicode Seq:\t$from\t$to$seq\t# $comment\n"; |
|
71 } |
|
72 |
|
73 if ( $from eq "" ) { next; } |
|
74 |
|
75 if ( $from =~ /0x(..)$/ ) { |
|
76 $from = " 0x$1"; |
|
77 } |
|
78 |
|
79 if ( $fromto{$from} eq "" ) { |
|
80 push(@fromlist, $from); |
|
81 $fromto{$from} = $to; |
|
82 $commentbody{$from} = $comment; |
|
83 $commentseq{$from} = $seq |
|
84 } elsif ( $fromto{$from} ne $to ) { |
|
85 # another mappint SJIS:UCS2 = 1:N |
|
86 print "Another map in $infile\t$from\t$fromto{$from},$to\n"; |
|
87 } |
|
88 |
|
89 if ($checkanother==1) { |
|
90 next; |
|
91 } |
|
92 |
|
93 if ( $tofrom{$to} eq "" ) { |
|
94 $tofrom{$to} = $from; |
|
95 } else { |
|
96 if ( $from !~ /$tofrom{$to}/ ){ |
|
97 $tofrom{$to} = "$tofrom{$to},$from"; |
|
98 } |
|
99 } |
|
100 |
|
101 # print "$from $to\n"; |
|
102 } |
|
103 |
|
104 close (IN); |
|
105 |
|
106 $checkanother == 1; |
|
107 } |
|
108 |
|
109 open (COMMENT, ">$commentfile") || die; |
|
110 foreach $from (sort(@fromlist)) { |
|
111 print COMMENT "$from\t$fromto{$from}$commentseq{$from}\t$commentbody{$from}\n"; |
|
112 } |
|
113 close (COMMENT); |
|
114 |
|
115 |
|
116 open(SU, ">$sufile") || die; |
|
117 open(US, ">$usfile") || die; |
|
118 open(JU, ">$jufile") || die; |
|
119 open(JEU, ">$jeufile") || die; |
|
120 open(JAU, ">$jaufile") || die; |
|
121 open(JRKU, ">$jrkufile") || die; |
|
122 open(UJ, ">$ujfile") || die; |
|
123 open(UJE, ">$ujefile") || die; |
|
124 open(UJA, ">$ujafile") || die; |
|
125 open(UJRK, ">$ujrkfile") || die; |
|
126 open(IBMNEC, ">$ibmnecfile") || die; |
|
127 |
|
128 # print SU "/* generated from $src : SJIS UCS2 */\n"; |
|
129 # print US "/* generated from $src : UCS2 SJIS */\n"; |
|
130 print "Generated from $src\n"; |
|
131 print "Command: mkjpconv.pl @ARGV\n"; |
|
132 print "SJIS(JIS)\tUCS2\tSJIS\tS:U:S\tSJIS lower\n"; |
|
133 |
|
134 foreach $i (sort(@fromlist)) { |
|
135 |
|
136 $ucs = ""; |
|
137 |
|
138 $sjis = $i; |
|
139 $sjis =~ s/\s+//; |
|
140 $jis = sjistojis($sjis); |
|
141 |
|
142 print "$i($jis)\t$fromto{$i}\t$tofrom{$fromto{$i}}"; |
|
143 $ucs = $fromto{$i}; |
|
144 |
|
145 if ( $i eq $tofrom{$fromto{$i}} ) { |
|
146 print "\t1:1:1"; |
|
147 print "\t$i"; |
|
148 } else { |
|
149 print "\t1:1:N"; |
|
150 @tolist = split(/,/,$tofrom{$fromto{$i}}); |
|
151 print "\t$tolist[0]"; |
|
152 #$ucs = $tolist[0]; |
|
153 if ( $sjis =~ /0xF[A-D]../ ) { |
|
154 $ibmnec{$sjis} = $tolist[0]; |
|
155 #print IBMNEC "$sjis\t$tolist[0]\n"; |
|
156 } |
|
157 |
|
158 } |
|
159 print SU "$sjis\t$ucs\n"; |
|
160 push(@uslist, "$ucs\t$sjis\n"); |
|
161 |
|
162 #print US "$ucs\t$sjis\n"; |
|
163 if ( $jis ne "") { |
|
164 #if ($sjis =~ /^0x87../ || $sjis =~ /^0xED../ ) { |
|
165 # cp932 ext |
|
166 if ($sjis =~ /0x..../ && $defined{$sjis} != 1) { |
|
167 # jis not define |
|
168 print JEU "$jis\t$ucs\n"; |
|
169 push(@ujelist, "$ucs\t$jis\n"); |
|
170 $jisextucs{$jis} = $ucs; |
|
171 } else { |
|
172 print JU "$jis\t$ucs\n"; |
|
173 push(@ujlist, "$ucs\t$jis\n"); |
|
174 $jisucs{$jis} = $ucs; |
|
175 } |
|
176 |
|
177 #print UJ "$ucs\t$jis\n"; |
|
178 } elsif ( $sjis =~ /\s*0x([8-9A-D].)/ ) { |
|
179 $code = $1; |
|
180 print JRKU "0x00$code\t$ucs\n"; |
|
181 push(@ujrklist, "$ucs\t0x00$code\n"); |
|
182 } elsif ( $sjis =~ /\s*0x([0-7].)/ ) { |
|
183 $code = $1; |
|
184 print JAU "0x00$code\t$ucs\n"; |
|
185 push(@ujalist, "$ucs\t0x00$code\n"); |
|
186 } |
|
187 #print "\t# $comment{$i}\n"; |
|
188 print "\n"; |
|
189 } |
|
190 |
|
191 print US sort(@uslist); |
|
192 print UJ sort(@ujlist); |
|
193 print UJE sort(@ujelist); |
|
194 print UJA sort(@ujalist); |
|
195 print UJRK sort(@ujrklist); |
|
196 |
|
197 # make ibmnec mapping |
|
198 |
|
199 print IBMNEC $NPL; |
|
200 print IBMNEC "/* generated by $ID */\n"; |
|
201 print IBMNEC "/* IBM ext codes to NEC sel (in CP932) */\n\n"; |
|
202 |
|
203 foreach $i (0xFA, 0xFB, 0xFC) { |
|
204 for ($j=( ($i==0xFA) ? 0x40 : 0x00 ); $j<=0xFF; $j++) { |
|
205 $ibm = sprintf("0x%02X%02X", $i, $j); |
|
206 $raw = substr($ibm, 2,6); |
|
207 if ("" == $ibmnec{$ibm}) { |
|
208 print IBMNEC "/* $raw:UNDEF */ 0, \n"; |
|
209 } else { |
|
210 print IBMNEC "/* $raw */ $ibmnec{$ibm}, \n"; |
|
211 } |
|
212 } |
|
213 } |
|
214 |
|
215 close(IBMNEC); |
|
216 |
|
217 # make jdx |
|
218 |
|
219 open (JDX, ">$jdxfile") || die; |
|
220 |
|
221 print JDX $NPL; |
|
222 print JDX "/* generated by $ID */\n"; |
|
223 print JDX "/* JIS X 0208 (with CP932 ext) to Unicode mapping */\n"; |
|
224 |
|
225 for ($i=0; $i<94; $i++) { |
|
226 printf JDX "/* 0x%2XXX */\n", ($i+0x21); |
|
227 printf JDX " "; |
|
228 for ($j=0; $j<94; $j++) { |
|
229 $jis = sprintf("0x%02X%02X", ($i+0x21), $j+0x21); |
|
230 # get JIS |
|
231 $ucs = $jisucs{$jis}; |
|
232 if ("" == $ucs) { |
|
233 # try CP932 ext |
|
234 # try jis ext |
|
235 $ucs = $jisextucs{$jis} |
|
236 } |
|
237 if ("" == $ucs) { |
|
238 # undefined |
|
239 print JDX "0xFFFD,"; |
|
240 } else { |
|
241 print JDX "$ucs,"; |
|
242 } |
|
243 if (7 == ( ($j+1) % 8 )) { |
|
244 printf JDX "/* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; |
|
245 } |
|
246 } |
|
247 printf JDX " /* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; |
|
248 } |
|
249 |
|
250 close (JDX); |
|
251 |
|
252 |
|
253 close(SU); |
|
254 close(US); |
|
255 close(JU); |
|
256 close(JEU); |
|
257 close(JAU); |
|
258 close(JRKU); |
|
259 close(UJ); |
|
260 close(UJE); |
|
261 close(UJA); |
|
262 close(UJRK); |
|
263 |
|
264 # generate uf files |
|
265 |
|
266 sub genuf { |
|
267 my ($infile, $outfile) = @_; |
|
268 my $com = "cat $infile | ./umaptable -uf > $gendir/$outfile"; |
|
269 print "Executing $com\n"; |
|
270 system($com); |
|
271 } |
|
272 |
|
273 genuf($sufile, "sjis.uf"); |
|
274 genuf($jufile, "jis0208.uf"); |
|
275 if ( $#ujelist > 0 ) { |
|
276 genuf($jeufile, "jis0208ext.uf"); |
|
277 } else { |
|
278 print "Extension is not found. jis0208ext.uf is not generated.\n"; |
|
279 } |
|
280 genuf("$jaufile $jrkufile", "jis0201.uf"); |
|
281 # genuf($jaufile, "jis0201.uf"); |
|
282 # genuf($jrkufile, "jis0201gl.uf"); |
|
283 |
|
284 |
|
285 # generate test page |
|
286 |
|
287 |
|
288 exit; |
|
289 |
|
290 sub sjistojis { |
|
291 my($sjis) = (@_); |
|
292 my($first,$second,$h, $l, $j0208); |
|
293 |
|
294 if ( $sjis !~ /^0x....$/ ) { |
|
295 return ""; |
|
296 } |
|
297 |
|
298 $first = hex(substr($sjis,2,2)); |
|
299 $second = hex(substr($sjis,4,2)); |
|
300 $jnum=0; |
|
301 |
|
302 if($first < 0xE0) |
|
303 { |
|
304 $jnum = ($first - 0x81) * ((0xfd - 0x80)+(0x7f - 0x40)); |
|
305 } else { |
|
306 $jnum = ($first - 0xe0 + (0xa0-0x81)) * ((0xfd - 0x80)+(0x7f - 0x40)); |
|
307 } |
|
308 if($second >= 0x80) |
|
309 { |
|
310 $jnum += $second - 0x80 + (0x7f-0x40); |
|
311 } |
|
312 else |
|
313 { |
|
314 $jnum += $second - 0x40; |
|
315 } |
|
316 if(($jnum / 94 ) < 94) { |
|
317 return sprintf "0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); |
|
318 } else { |
|
319 #return sprintf "# 0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); |
|
320 return ""; |
|
321 } |
|
322 } |
|
323 |