michael@0: #!/usr/bin/perl michael@0: $ID = "mkjpconv.pl @ARGV (Time-stamp: <2001-08-08 18:54:54 shom>)"; michael@0: michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: # michael@0: # based on CP932.TXT from unicode.org michael@0: # additional information from SHIFTJIS.TXT from unicode.org michael@0: # michael@0: # mapping policy: michael@0: # jis0208 to unicode : based on CP932 michael@0: # unicode to jis0208 : based on CP932 michael@0: # the lowest code is used for dual mapping to jis0208 michael@0: # ascii region : based on ISO8859-1 ( same as CP932 ) IGNORE? michael@0: # kana region : based on CP932 michael@0: # IBM Ext(0xFxxx>) : premap to NEC region ( mappable to JIS ) michael@0: michael@0: if ($ARGV[0] eq "") { michael@0: print STDERR "usage: mkjpconv.pl SHIFTJIS.TXT [Another check]\n"; michael@0: exit 1; michael@0: } michael@0: michael@0: open (SI, "SHIFTJIS.TXT") || die; michael@0: while() { michael@0: ($hi,$lo) = /^0x(..)?(..)\s/; michael@0: if ($lo eq "") { next; } michael@0: if ($hi eq "") { $hi=" " } michael@0: $defined{"0x$hi$lo"} = 1; michael@0: } michael@0: close (SI); michael@0: michael@0: shift(@ARGV); michael@0: michael@0: $src = $ARGV[0]; michael@0: michael@0: $gendir = "$src.d"; michael@0: mkdir("$src.d"); michael@0: michael@0: $sufile = "sjis2ucs-$src.map"; michael@0: $usfile = "ucs2sjis-$src.map"; michael@0: $jufile = "jis2ucs-$src.map"; michael@0: $jeufile = "jisext2ucs-$src.map"; michael@0: $jaufile = "jisasc2ucs-$src.map"; michael@0: $jrkufile = "jiskana2ucs-$src.map"; michael@0: $ujfile = "ucs2jis-$src.map"; michael@0: $ujefile = "ucs2jisext-$src.map"; michael@0: $ujafile = "ucs2jisasc-$src.map"; michael@0: $ujrkfile = "ucs2jiskana-$src.map"; michael@0: $ibmnecfile = "$gendir/IBMNEC.map"; michael@0: $jdxfile = "$gendir/jis0208.ump"; michael@0: $jdxextfile = "jis0208ext.ump"; michael@0: $commentfile = "comment-$src.txt"; michael@0: michael@0: open (IN, "NPL.header") || die; michael@0: while() { michael@0: $NPL .= $_; michael@0: } michael@0: close (IN); michael@0: michael@0: foreach $infile ( @ARGV ) { michael@0: michael@0: open (IN, "$infile") || die; michael@0: michael@0: while() { michael@0: ($from, $to, $seq, $dum, $comment) = michael@0: /^\s*(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)(\+0x\S+)?(\s+\#\s*(\S.*))?$/; michael@0: if ( $seq ne "" ) { michael@0: print "Warning: Unicode Seq:\t$from\t$to$seq\t# $comment\n"; michael@0: } michael@0: michael@0: if ( $from eq "" ) { next; } michael@0: michael@0: if ( $from =~ /0x(..)$/ ) { michael@0: $from = " 0x$1"; michael@0: } michael@0: michael@0: if ( $fromto{$from} eq "" ) { michael@0: push(@fromlist, $from); michael@0: $fromto{$from} = $to; michael@0: $commentbody{$from} = $comment; michael@0: $commentseq{$from} = $seq michael@0: } elsif ( $fromto{$from} ne $to ) { michael@0: # another mappint SJIS:UCS2 = 1:N michael@0: print "Another map in $infile\t$from\t$fromto{$from},$to\n"; michael@0: } michael@0: michael@0: if ($checkanother==1) { michael@0: next; michael@0: } michael@0: michael@0: if ( $tofrom{$to} eq "" ) { michael@0: $tofrom{$to} = $from; michael@0: } else { michael@0: if ( $from !~ /$tofrom{$to}/ ){ michael@0: $tofrom{$to} = "$tofrom{$to},$from"; michael@0: } michael@0: } michael@0: michael@0: # print "$from $to\n"; michael@0: } michael@0: michael@0: close (IN); michael@0: michael@0: $checkanother == 1; michael@0: } michael@0: michael@0: open (COMMENT, ">$commentfile") || die; michael@0: foreach $from (sort(@fromlist)) { michael@0: print COMMENT "$from\t$fromto{$from}$commentseq{$from}\t$commentbody{$from}\n"; michael@0: } michael@0: close (COMMENT); michael@0: michael@0: michael@0: open(SU, ">$sufile") || die; michael@0: open(US, ">$usfile") || die; michael@0: open(JU, ">$jufile") || die; michael@0: open(JEU, ">$jeufile") || die; michael@0: open(JAU, ">$jaufile") || die; michael@0: open(JRKU, ">$jrkufile") || die; michael@0: open(UJ, ">$ujfile") || die; michael@0: open(UJE, ">$ujefile") || die; michael@0: open(UJA, ">$ujafile") || die; michael@0: open(UJRK, ">$ujrkfile") || die; michael@0: open(IBMNEC, ">$ibmnecfile") || die; michael@0: michael@0: # print SU "/* generated from $src : SJIS UCS2 */\n"; michael@0: # print US "/* generated from $src : UCS2 SJIS */\n"; michael@0: print "Generated from $src\n"; michael@0: print "Command: mkjpconv.pl @ARGV\n"; michael@0: print "SJIS(JIS)\tUCS2\tSJIS\tS:U:S\tSJIS lower\n"; michael@0: michael@0: foreach $i (sort(@fromlist)) { michael@0: michael@0: $ucs = ""; michael@0: michael@0: $sjis = $i; michael@0: $sjis =~ s/\s+//; michael@0: $jis = sjistojis($sjis); michael@0: michael@0: print "$i($jis)\t$fromto{$i}\t$tofrom{$fromto{$i}}"; michael@0: $ucs = $fromto{$i}; michael@0: michael@0: if ( $i eq $tofrom{$fromto{$i}} ) { michael@0: print "\t1:1:1"; michael@0: print "\t$i"; michael@0: } else { michael@0: print "\t1:1:N"; michael@0: @tolist = split(/,/,$tofrom{$fromto{$i}}); michael@0: print "\t$tolist[0]"; michael@0: #$ucs = $tolist[0]; michael@0: if ( $sjis =~ /0xF[A-D]../ ) { michael@0: $ibmnec{$sjis} = $tolist[0]; michael@0: #print IBMNEC "$sjis\t$tolist[0]\n"; michael@0: } michael@0: michael@0: } michael@0: print SU "$sjis\t$ucs\n"; michael@0: push(@uslist, "$ucs\t$sjis\n"); michael@0: michael@0: #print US "$ucs\t$sjis\n"; michael@0: if ( $jis ne "") { michael@0: #if ($sjis =~ /^0x87../ || $sjis =~ /^0xED../ ) { michael@0: # cp932 ext michael@0: if ($sjis =~ /0x..../ && $defined{$sjis} != 1) { michael@0: # jis not define michael@0: print JEU "$jis\t$ucs\n"; michael@0: push(@ujelist, "$ucs\t$jis\n"); michael@0: $jisextucs{$jis} = $ucs; michael@0: } else { michael@0: print JU "$jis\t$ucs\n"; michael@0: push(@ujlist, "$ucs\t$jis\n"); michael@0: $jisucs{$jis} = $ucs; michael@0: } michael@0: michael@0: #print UJ "$ucs\t$jis\n"; michael@0: } elsif ( $sjis =~ /\s*0x([8-9A-D].)/ ) { michael@0: $code = $1; michael@0: print JRKU "0x00$code\t$ucs\n"; michael@0: push(@ujrklist, "$ucs\t0x00$code\n"); michael@0: } elsif ( $sjis =~ /\s*0x([0-7].)/ ) { michael@0: $code = $1; michael@0: print JAU "0x00$code\t$ucs\n"; michael@0: push(@ujalist, "$ucs\t0x00$code\n"); michael@0: } michael@0: #print "\t# $comment{$i}\n"; michael@0: print "\n"; michael@0: } michael@0: michael@0: print US sort(@uslist); michael@0: print UJ sort(@ujlist); michael@0: print UJE sort(@ujelist); michael@0: print UJA sort(@ujalist); michael@0: print UJRK sort(@ujrklist); michael@0: michael@0: # make ibmnec mapping michael@0: michael@0: print IBMNEC $NPL; michael@0: print IBMNEC "/* generated by $ID */\n"; michael@0: print IBMNEC "/* IBM ext codes to NEC sel (in CP932) */\n\n"; michael@0: michael@0: foreach $i (0xFA, 0xFB, 0xFC) { michael@0: for ($j=( ($i==0xFA) ? 0x40 : 0x00 ); $j<=0xFF; $j++) { michael@0: $ibm = sprintf("0x%02X%02X", $i, $j); michael@0: $raw = substr($ibm, 2,6); michael@0: if ("" == $ibmnec{$ibm}) { michael@0: print IBMNEC "/* $raw:UNDEF */ 0, \n"; michael@0: } else { michael@0: print IBMNEC "/* $raw */ $ibmnec{$ibm}, \n"; michael@0: } michael@0: } michael@0: } michael@0: michael@0: close(IBMNEC); michael@0: michael@0: # make jdx michael@0: michael@0: open (JDX, ">$jdxfile") || die; michael@0: michael@0: print JDX $NPL; michael@0: print JDX "/* generated by $ID */\n"; michael@0: print JDX "/* JIS X 0208 (with CP932 ext) to Unicode mapping */\n"; michael@0: michael@0: for ($i=0; $i<94; $i++) { michael@0: printf JDX "/* 0x%2XXX */\n", ($i+0x21); michael@0: printf JDX " "; michael@0: for ($j=0; $j<94; $j++) { michael@0: $jis = sprintf("0x%02X%02X", ($i+0x21), $j+0x21); michael@0: # get JIS michael@0: $ucs = $jisucs{$jis}; michael@0: if ("" == $ucs) { michael@0: # try CP932 ext michael@0: # try jis ext michael@0: $ucs = $jisextucs{$jis} michael@0: } michael@0: if ("" == $ucs) { michael@0: # undefined michael@0: print JDX "0xFFFD,"; michael@0: } else { michael@0: print JDX "$ucs,"; michael@0: } michael@0: if (7 == ( ($j+1) % 8 )) { michael@0: printf JDX "/* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; michael@0: } michael@0: } michael@0: printf JDX " /* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; michael@0: } michael@0: michael@0: close (JDX); michael@0: michael@0: michael@0: close(SU); michael@0: close(US); michael@0: close(JU); michael@0: close(JEU); michael@0: close(JAU); michael@0: close(JRKU); michael@0: close(UJ); michael@0: close(UJE); michael@0: close(UJA); michael@0: close(UJRK); michael@0: michael@0: # generate uf files michael@0: michael@0: sub genuf { michael@0: my ($infile, $outfile) = @_; michael@0: my $com = "cat $infile | ./umaptable -uf > $gendir/$outfile"; michael@0: print "Executing $com\n"; michael@0: system($com); michael@0: } michael@0: michael@0: genuf($sufile, "sjis.uf"); michael@0: genuf($jufile, "jis0208.uf"); michael@0: if ( $#ujelist > 0 ) { michael@0: genuf($jeufile, "jis0208ext.uf"); michael@0: } else { michael@0: print "Extension is not found. jis0208ext.uf is not generated.\n"; michael@0: } michael@0: genuf("$jaufile $jrkufile", "jis0201.uf"); michael@0: # genuf($jaufile, "jis0201.uf"); michael@0: # genuf($jrkufile, "jis0201gl.uf"); michael@0: michael@0: michael@0: # generate test page michael@0: michael@0: michael@0: exit; michael@0: michael@0: sub sjistojis { michael@0: my($sjis) = (@_); michael@0: my($first,$second,$h, $l, $j0208); michael@0: michael@0: if ( $sjis !~ /^0x....$/ ) { michael@0: return ""; michael@0: } michael@0: michael@0: $first = hex(substr($sjis,2,2)); michael@0: $second = hex(substr($sjis,4,2)); michael@0: $jnum=0; michael@0: michael@0: if($first < 0xE0) michael@0: { michael@0: $jnum = ($first - 0x81) * ((0xfd - 0x80)+(0x7f - 0x40)); michael@0: } else { michael@0: $jnum = ($first - 0xe0 + (0xa0-0x81)) * ((0xfd - 0x80)+(0x7f - 0x40)); michael@0: } michael@0: if($second >= 0x80) michael@0: { michael@0: $jnum += $second - 0x80 + (0x7f-0x40); michael@0: } michael@0: else michael@0: { michael@0: $jnum += $second - 0x40; michael@0: } michael@0: if(($jnum / 94 ) < 94) { michael@0: return sprintf "0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); michael@0: } else { michael@0: #return sprintf "# 0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); michael@0: return ""; michael@0: } michael@0: } michael@0: