Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | #!/usr/bin/perl |
michael@0 | 2 | $ID = "mkjpconv.pl @ARGV (Time-stamp: <2001-08-08 18:54:54 shom>)"; |
michael@0 | 3 | |
michael@0 | 4 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 5 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 6 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 7 | |
michael@0 | 8 | # |
michael@0 | 9 | # based on CP932.TXT from unicode.org |
michael@0 | 10 | # additional information from SHIFTJIS.TXT from unicode.org |
michael@0 | 11 | # |
michael@0 | 12 | # mapping policy: |
michael@0 | 13 | # jis0208 to unicode : based on CP932 |
michael@0 | 14 | # unicode to jis0208 : based on CP932 |
michael@0 | 15 | # the lowest code is used for dual mapping to jis0208 |
michael@0 | 16 | # ascii region : based on ISO8859-1 ( same as CP932 ) IGNORE? |
michael@0 | 17 | # kana region : based on CP932 |
michael@0 | 18 | # IBM Ext(0xFxxx>) : premap to NEC region ( mappable to JIS ) |
michael@0 | 19 | |
michael@0 | 20 | if ($ARGV[0] eq "") { |
michael@0 | 21 | print STDERR "usage: mkjpconv.pl SHIFTJIS.TXT <INFILE(ex:CP932.TXT)> [Another check]\n"; |
michael@0 | 22 | exit 1; |
michael@0 | 23 | } |
michael@0 | 24 | |
michael@0 | 25 | open (SI, "SHIFTJIS.TXT") || die; |
michael@0 | 26 | while(<SI>) { |
michael@0 | 27 | ($hi,$lo) = /^0x(..)?(..)\s/; |
michael@0 | 28 | if ($lo eq "") { next; } |
michael@0 | 29 | if ($hi eq "") { $hi=" " } |
michael@0 | 30 | $defined{"0x$hi$lo"} = 1; |
michael@0 | 31 | } |
michael@0 | 32 | close (SI); |
michael@0 | 33 | |
michael@0 | 34 | shift(@ARGV); |
michael@0 | 35 | |
michael@0 | 36 | $src = $ARGV[0]; |
michael@0 | 37 | |
michael@0 | 38 | $gendir = "$src.d"; |
michael@0 | 39 | mkdir("$src.d"); |
michael@0 | 40 | |
michael@0 | 41 | $sufile = "sjis2ucs-$src.map"; |
michael@0 | 42 | $usfile = "ucs2sjis-$src.map"; |
michael@0 | 43 | $jufile = "jis2ucs-$src.map"; |
michael@0 | 44 | $jeufile = "jisext2ucs-$src.map"; |
michael@0 | 45 | $jaufile = "jisasc2ucs-$src.map"; |
michael@0 | 46 | $jrkufile = "jiskana2ucs-$src.map"; |
michael@0 | 47 | $ujfile = "ucs2jis-$src.map"; |
michael@0 | 48 | $ujefile = "ucs2jisext-$src.map"; |
michael@0 | 49 | $ujafile = "ucs2jisasc-$src.map"; |
michael@0 | 50 | $ujrkfile = "ucs2jiskana-$src.map"; |
michael@0 | 51 | $ibmnecfile = "$gendir/IBMNEC.map"; |
michael@0 | 52 | $jdxfile = "$gendir/jis0208.ump"; |
michael@0 | 53 | $jdxextfile = "jis0208ext.ump"; |
michael@0 | 54 | $commentfile = "comment-$src.txt"; |
michael@0 | 55 | |
michael@0 | 56 | open (IN, "NPL.header") || die; |
michael@0 | 57 | while(<IN>) { |
michael@0 | 58 | $NPL .= $_; |
michael@0 | 59 | } |
michael@0 | 60 | close (IN); |
michael@0 | 61 | |
michael@0 | 62 | foreach $infile ( @ARGV ) { |
michael@0 | 63 | |
michael@0 | 64 | open (IN, "$infile") || die; |
michael@0 | 65 | |
michael@0 | 66 | while(<IN>) { |
michael@0 | 67 | ($from, $to, $seq, $dum, $comment) = |
michael@0 | 68 | /^\s*(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)(\+0x\S+)?(\s+\#\s*(\S.*))?$/; |
michael@0 | 69 | if ( $seq ne "" ) { |
michael@0 | 70 | print "Warning: Unicode Seq:\t$from\t$to$seq\t# $comment\n"; |
michael@0 | 71 | } |
michael@0 | 72 | |
michael@0 | 73 | if ( $from eq "" ) { next; } |
michael@0 | 74 | |
michael@0 | 75 | if ( $from =~ /0x(..)$/ ) { |
michael@0 | 76 | $from = " 0x$1"; |
michael@0 | 77 | } |
michael@0 | 78 | |
michael@0 | 79 | if ( $fromto{$from} eq "" ) { |
michael@0 | 80 | push(@fromlist, $from); |
michael@0 | 81 | $fromto{$from} = $to; |
michael@0 | 82 | $commentbody{$from} = $comment; |
michael@0 | 83 | $commentseq{$from} = $seq |
michael@0 | 84 | } elsif ( $fromto{$from} ne $to ) { |
michael@0 | 85 | # another mappint SJIS:UCS2 = 1:N |
michael@0 | 86 | print "Another map in $infile\t$from\t$fromto{$from},$to\n"; |
michael@0 | 87 | } |
michael@0 | 88 | |
michael@0 | 89 | if ($checkanother==1) { |
michael@0 | 90 | next; |
michael@0 | 91 | } |
michael@0 | 92 | |
michael@0 | 93 | if ( $tofrom{$to} eq "" ) { |
michael@0 | 94 | $tofrom{$to} = $from; |
michael@0 | 95 | } else { |
michael@0 | 96 | if ( $from !~ /$tofrom{$to}/ ){ |
michael@0 | 97 | $tofrom{$to} = "$tofrom{$to},$from"; |
michael@0 | 98 | } |
michael@0 | 99 | } |
michael@0 | 100 | |
michael@0 | 101 | # print "$from $to\n"; |
michael@0 | 102 | } |
michael@0 | 103 | |
michael@0 | 104 | close (IN); |
michael@0 | 105 | |
michael@0 | 106 | $checkanother == 1; |
michael@0 | 107 | } |
michael@0 | 108 | |
michael@0 | 109 | open (COMMENT, ">$commentfile") || die; |
michael@0 | 110 | foreach $from (sort(@fromlist)) { |
michael@0 | 111 | print COMMENT "$from\t$fromto{$from}$commentseq{$from}\t$commentbody{$from}\n"; |
michael@0 | 112 | } |
michael@0 | 113 | close (COMMENT); |
michael@0 | 114 | |
michael@0 | 115 | |
michael@0 | 116 | open(SU, ">$sufile") || die; |
michael@0 | 117 | open(US, ">$usfile") || die; |
michael@0 | 118 | open(JU, ">$jufile") || die; |
michael@0 | 119 | open(JEU, ">$jeufile") || die; |
michael@0 | 120 | open(JAU, ">$jaufile") || die; |
michael@0 | 121 | open(JRKU, ">$jrkufile") || die; |
michael@0 | 122 | open(UJ, ">$ujfile") || die; |
michael@0 | 123 | open(UJE, ">$ujefile") || die; |
michael@0 | 124 | open(UJA, ">$ujafile") || die; |
michael@0 | 125 | open(UJRK, ">$ujrkfile") || die; |
michael@0 | 126 | open(IBMNEC, ">$ibmnecfile") || die; |
michael@0 | 127 | |
michael@0 | 128 | # print SU "/* generated from $src : SJIS UCS2 */\n"; |
michael@0 | 129 | # print US "/* generated from $src : UCS2 SJIS */\n"; |
michael@0 | 130 | print "Generated from $src\n"; |
michael@0 | 131 | print "Command: mkjpconv.pl @ARGV\n"; |
michael@0 | 132 | print "SJIS(JIS)\tUCS2\tSJIS\tS:U:S\tSJIS lower\n"; |
michael@0 | 133 | |
michael@0 | 134 | foreach $i (sort(@fromlist)) { |
michael@0 | 135 | |
michael@0 | 136 | $ucs = ""; |
michael@0 | 137 | |
michael@0 | 138 | $sjis = $i; |
michael@0 | 139 | $sjis =~ s/\s+//; |
michael@0 | 140 | $jis = sjistojis($sjis); |
michael@0 | 141 | |
michael@0 | 142 | print "$i($jis)\t$fromto{$i}\t$tofrom{$fromto{$i}}"; |
michael@0 | 143 | $ucs = $fromto{$i}; |
michael@0 | 144 | |
michael@0 | 145 | if ( $i eq $tofrom{$fromto{$i}} ) { |
michael@0 | 146 | print "\t1:1:1"; |
michael@0 | 147 | print "\t$i"; |
michael@0 | 148 | } else { |
michael@0 | 149 | print "\t1:1:N"; |
michael@0 | 150 | @tolist = split(/,/,$tofrom{$fromto{$i}}); |
michael@0 | 151 | print "\t$tolist[0]"; |
michael@0 | 152 | #$ucs = $tolist[0]; |
michael@0 | 153 | if ( $sjis =~ /0xF[A-D]../ ) { |
michael@0 | 154 | $ibmnec{$sjis} = $tolist[0]; |
michael@0 | 155 | #print IBMNEC "$sjis\t$tolist[0]\n"; |
michael@0 | 156 | } |
michael@0 | 157 | |
michael@0 | 158 | } |
michael@0 | 159 | print SU "$sjis\t$ucs\n"; |
michael@0 | 160 | push(@uslist, "$ucs\t$sjis\n"); |
michael@0 | 161 | |
michael@0 | 162 | #print US "$ucs\t$sjis\n"; |
michael@0 | 163 | if ( $jis ne "") { |
michael@0 | 164 | #if ($sjis =~ /^0x87../ || $sjis =~ /^0xED../ ) { |
michael@0 | 165 | # cp932 ext |
michael@0 | 166 | if ($sjis =~ /0x..../ && $defined{$sjis} != 1) { |
michael@0 | 167 | # jis not define |
michael@0 | 168 | print JEU "$jis\t$ucs\n"; |
michael@0 | 169 | push(@ujelist, "$ucs\t$jis\n"); |
michael@0 | 170 | $jisextucs{$jis} = $ucs; |
michael@0 | 171 | } else { |
michael@0 | 172 | print JU "$jis\t$ucs\n"; |
michael@0 | 173 | push(@ujlist, "$ucs\t$jis\n"); |
michael@0 | 174 | $jisucs{$jis} = $ucs; |
michael@0 | 175 | } |
michael@0 | 176 | |
michael@0 | 177 | #print UJ "$ucs\t$jis\n"; |
michael@0 | 178 | } elsif ( $sjis =~ /\s*0x([8-9A-D].)/ ) { |
michael@0 | 179 | $code = $1; |
michael@0 | 180 | print JRKU "0x00$code\t$ucs\n"; |
michael@0 | 181 | push(@ujrklist, "$ucs\t0x00$code\n"); |
michael@0 | 182 | } elsif ( $sjis =~ /\s*0x([0-7].)/ ) { |
michael@0 | 183 | $code = $1; |
michael@0 | 184 | print JAU "0x00$code\t$ucs\n"; |
michael@0 | 185 | push(@ujalist, "$ucs\t0x00$code\n"); |
michael@0 | 186 | } |
michael@0 | 187 | #print "\t# $comment{$i}\n"; |
michael@0 | 188 | print "\n"; |
michael@0 | 189 | } |
michael@0 | 190 | |
michael@0 | 191 | print US sort(@uslist); |
michael@0 | 192 | print UJ sort(@ujlist); |
michael@0 | 193 | print UJE sort(@ujelist); |
michael@0 | 194 | print UJA sort(@ujalist); |
michael@0 | 195 | print UJRK sort(@ujrklist); |
michael@0 | 196 | |
michael@0 | 197 | # make ibmnec mapping |
michael@0 | 198 | |
michael@0 | 199 | print IBMNEC $NPL; |
michael@0 | 200 | print IBMNEC "/* generated by $ID */\n"; |
michael@0 | 201 | print IBMNEC "/* IBM ext codes to NEC sel (in CP932) */\n\n"; |
michael@0 | 202 | |
michael@0 | 203 | foreach $i (0xFA, 0xFB, 0xFC) { |
michael@0 | 204 | for ($j=( ($i==0xFA) ? 0x40 : 0x00 ); $j<=0xFF; $j++) { |
michael@0 | 205 | $ibm = sprintf("0x%02X%02X", $i, $j); |
michael@0 | 206 | $raw = substr($ibm, 2,6); |
michael@0 | 207 | if ("" == $ibmnec{$ibm}) { |
michael@0 | 208 | print IBMNEC "/* $raw:UNDEF */ 0, \n"; |
michael@0 | 209 | } else { |
michael@0 | 210 | print IBMNEC "/* $raw */ $ibmnec{$ibm}, \n"; |
michael@0 | 211 | } |
michael@0 | 212 | } |
michael@0 | 213 | } |
michael@0 | 214 | |
michael@0 | 215 | close(IBMNEC); |
michael@0 | 216 | |
michael@0 | 217 | # make jdx |
michael@0 | 218 | |
michael@0 | 219 | open (JDX, ">$jdxfile") || die; |
michael@0 | 220 | |
michael@0 | 221 | print JDX $NPL; |
michael@0 | 222 | print JDX "/* generated by $ID */\n"; |
michael@0 | 223 | print JDX "/* JIS X 0208 (with CP932 ext) to Unicode mapping */\n"; |
michael@0 | 224 | |
michael@0 | 225 | for ($i=0; $i<94; $i++) { |
michael@0 | 226 | printf JDX "/* 0x%2XXX */\n", ($i+0x21); |
michael@0 | 227 | printf JDX " "; |
michael@0 | 228 | for ($j=0; $j<94; $j++) { |
michael@0 | 229 | $jis = sprintf("0x%02X%02X", ($i+0x21), $j+0x21); |
michael@0 | 230 | # get JIS |
michael@0 | 231 | $ucs = $jisucs{$jis}; |
michael@0 | 232 | if ("" == $ucs) { |
michael@0 | 233 | # try CP932 ext |
michael@0 | 234 | # try jis ext |
michael@0 | 235 | $ucs = $jisextucs{$jis} |
michael@0 | 236 | } |
michael@0 | 237 | if ("" == $ucs) { |
michael@0 | 238 | # undefined |
michael@0 | 239 | print JDX "0xFFFD,"; |
michael@0 | 240 | } else { |
michael@0 | 241 | print JDX "$ucs,"; |
michael@0 | 242 | } |
michael@0 | 243 | if (7 == ( ($j+1) % 8 )) { |
michael@0 | 244 | printf JDX "/* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; |
michael@0 | 245 | } |
michael@0 | 246 | } |
michael@0 | 247 | printf JDX " /* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; |
michael@0 | 248 | } |
michael@0 | 249 | |
michael@0 | 250 | close (JDX); |
michael@0 | 251 | |
michael@0 | 252 | |
michael@0 | 253 | close(SU); |
michael@0 | 254 | close(US); |
michael@0 | 255 | close(JU); |
michael@0 | 256 | close(JEU); |
michael@0 | 257 | close(JAU); |
michael@0 | 258 | close(JRKU); |
michael@0 | 259 | close(UJ); |
michael@0 | 260 | close(UJE); |
michael@0 | 261 | close(UJA); |
michael@0 | 262 | close(UJRK); |
michael@0 | 263 | |
michael@0 | 264 | # generate uf files |
michael@0 | 265 | |
michael@0 | 266 | sub genuf { |
michael@0 | 267 | my ($infile, $outfile) = @_; |
michael@0 | 268 | my $com = "cat $infile | ./umaptable -uf > $gendir/$outfile"; |
michael@0 | 269 | print "Executing $com\n"; |
michael@0 | 270 | system($com); |
michael@0 | 271 | } |
michael@0 | 272 | |
michael@0 | 273 | genuf($sufile, "sjis.uf"); |
michael@0 | 274 | genuf($jufile, "jis0208.uf"); |
michael@0 | 275 | if ( $#ujelist > 0 ) { |
michael@0 | 276 | genuf($jeufile, "jis0208ext.uf"); |
michael@0 | 277 | } else { |
michael@0 | 278 | print "Extension is not found. jis0208ext.uf is not generated.\n"; |
michael@0 | 279 | } |
michael@0 | 280 | genuf("$jaufile $jrkufile", "jis0201.uf"); |
michael@0 | 281 | # genuf($jaufile, "jis0201.uf"); |
michael@0 | 282 | # genuf($jrkufile, "jis0201gl.uf"); |
michael@0 | 283 | |
michael@0 | 284 | |
michael@0 | 285 | # generate test page |
michael@0 | 286 | |
michael@0 | 287 | |
michael@0 | 288 | exit; |
michael@0 | 289 | |
michael@0 | 290 | sub sjistojis { |
michael@0 | 291 | my($sjis) = (@_); |
michael@0 | 292 | my($first,$second,$h, $l, $j0208); |
michael@0 | 293 | |
michael@0 | 294 | if ( $sjis !~ /^0x....$/ ) { |
michael@0 | 295 | return ""; |
michael@0 | 296 | } |
michael@0 | 297 | |
michael@0 | 298 | $first = hex(substr($sjis,2,2)); |
michael@0 | 299 | $second = hex(substr($sjis,4,2)); |
michael@0 | 300 | $jnum=0; |
michael@0 | 301 | |
michael@0 | 302 | if($first < 0xE0) |
michael@0 | 303 | { |
michael@0 | 304 | $jnum = ($first - 0x81) * ((0xfd - 0x80)+(0x7f - 0x40)); |
michael@0 | 305 | } else { |
michael@0 | 306 | $jnum = ($first - 0xe0 + (0xa0-0x81)) * ((0xfd - 0x80)+(0x7f - 0x40)); |
michael@0 | 307 | } |
michael@0 | 308 | if($second >= 0x80) |
michael@0 | 309 | { |
michael@0 | 310 | $jnum += $second - 0x80 + (0x7f-0x40); |
michael@0 | 311 | } |
michael@0 | 312 | else |
michael@0 | 313 | { |
michael@0 | 314 | $jnum += $second - 0x40; |
michael@0 | 315 | } |
michael@0 | 316 | if(($jnum / 94 ) < 94) { |
michael@0 | 317 | return sprintf "0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); |
michael@0 | 318 | } else { |
michael@0 | 319 | #return sprintf "# 0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); |
michael@0 | 320 | return ""; |
michael@0 | 321 | } |
michael@0 | 322 | } |
michael@0 | 323 |