intl/uconv/tools/mkjpconv.pl

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rwxr-xr-x

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 #!/usr/bin/perl
     2 $ID = "mkjpconv.pl @ARGV (Time-stamp: <2001-08-08 18:54:54 shom>)";
     4 # This Source Code Form is subject to the terms of the Mozilla Public
     5 # License, v. 2.0. If a copy of the MPL was not distributed with this
     6 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     8 #
     9 # based on CP932.TXT from unicode.org
    10 # additional information from SHIFTJIS.TXT from unicode.org
    11 #
    12 # mapping policy:
    13 #   jis0208 to unicode : based on CP932
    14 #   unicode to jis0208 : based on CP932
    15 #                        the lowest code is used for dual mapping to jis0208
    16 #   ascii region       : based on ISO8859-1 ( same as CP932 ) IGNORE?
    17 #   kana region        : based on CP932
    18 #   IBM Ext(0xFxxx>)   : premap to NEC region ( mappable to JIS )
    20 if ($ARGV[0] eq "") {
    21     print STDERR "usage: mkjpconv.pl SHIFTJIS.TXT <INFILE(ex:CP932.TXT)> [Another check]\n";
    22     exit 1;
    23 }
    25 open (SI, "SHIFTJIS.TXT") || die;
    26 while(<SI>) {
    27     ($hi,$lo) = /^0x(..)?(..)\s/;
    28     if ($lo eq "") { next; }
    29     if ($hi eq "") { $hi="  " }
    30     $defined{"0x$hi$lo"} = 1;
    31 }
    32 close (SI);
    34 shift(@ARGV);
    36 $src = $ARGV[0];
    38 $gendir = "$src.d";
    39 mkdir("$src.d");
    41 $sufile = "sjis2ucs-$src.map";
    42 $usfile = "ucs2sjis-$src.map";
    43 $jufile = "jis2ucs-$src.map";
    44 $jeufile = "jisext2ucs-$src.map";
    45 $jaufile = "jisasc2ucs-$src.map";
    46 $jrkufile = "jiskana2ucs-$src.map";
    47 $ujfile = "ucs2jis-$src.map";
    48 $ujefile = "ucs2jisext-$src.map";
    49 $ujafile = "ucs2jisasc-$src.map";
    50 $ujrkfile = "ucs2jiskana-$src.map";
    51 $ibmnecfile = "$gendir/IBMNEC.map";
    52 $jdxfile = "$gendir/jis0208.ump";
    53 $jdxextfile = "jis0208ext.ump";
    54 $commentfile = "comment-$src.txt";
    56 open (IN, "NPL.header") || die;
    57 while(<IN>) {
    58     $NPL .= $_;
    59 }
    60 close (IN);
    62 foreach $infile ( @ARGV ) {
    64     open (IN, "$infile") || die;
    66     while(<IN>) {
    67 	($from, $to, $seq, $dum, $comment) =
    68 	    /^\s*(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)(\+0x\S+)?(\s+\#\s*(\S.*))?$/;
    69 	if ( $seq ne "" ) {
    70 	    print "Warning: Unicode Seq:\t$from\t$to$seq\t# $comment\n";
    71 	}
    73 	if ( $from eq "" ) { next; }
    75 	if ( $from =~ /0x(..)$/ ) {
    76 	    $from = "  0x$1";
    77 	}
    79 	if ( $fromto{$from} eq "" ) {
    80 	    push(@fromlist, $from);
    81 	    $fromto{$from} = $to;
    82 	    $commentbody{$from} = $comment;
    83 	    $commentseq{$from} = $seq
    84 	} elsif ( $fromto{$from} ne $to ) {
    85 	    # another mappint SJIS:UCS2 = 1:N
    86 	    print "Another map in $infile\t$from\t$fromto{$from},$to\n";
    87 	}
    89 	if ($checkanother==1) {
    90 	    next;
    91 	}
    93 	if ( $tofrom{$to} eq "" ) {
    94 	    $tofrom{$to} = $from;
    95 	} else {
    96 	    if ( $from !~ /$tofrom{$to}/ ){
    97 	    $tofrom{$to} = "$tofrom{$to},$from";
    98 	}
    99 	}
   101 	# print "$from $to\n";
   102     }
   104     close (IN);
   106     $checkanother == 1;
   107 }
   109 open (COMMENT, ">$commentfile") || die;
   110 foreach $from (sort(@fromlist)) {
   111     print COMMENT "$from\t$fromto{$from}$commentseq{$from}\t$commentbody{$from}\n";
   112 }
   113 close (COMMENT);
   116 open(SU, ">$sufile") || die;
   117 open(US, ">$usfile") || die;
   118 open(JU, ">$jufile") || die;
   119 open(JEU, ">$jeufile") || die;
   120 open(JAU, ">$jaufile") || die;
   121 open(JRKU, ">$jrkufile") || die;
   122 open(UJ, ">$ujfile") || die;
   123 open(UJE, ">$ujefile") || die;
   124 open(UJA, ">$ujafile") || die;
   125 open(UJRK, ">$ujrkfile") || die;
   126 open(IBMNEC, ">$ibmnecfile") || die;
   128 # print SU "/* generated from $src : SJIS UCS2 */\n";
   129 # print US "/* generated from $src : UCS2 SJIS */\n";
   130 print "Generated from $src\n";
   131 print "Command: mkjpconv.pl @ARGV\n";
   132 print "SJIS(JIS)\tUCS2\tSJIS\tS:U:S\tSJIS lower\n";
   134 foreach $i (sort(@fromlist)) {
   136     $ucs = "";
   138     $sjis = $i;
   139     $sjis =~ s/\s+//;
   140     $jis = sjistojis($sjis);
   142     print "$i($jis)\t$fromto{$i}\t$tofrom{$fromto{$i}}";
   143     $ucs = $fromto{$i};
   145     if ( $i eq $tofrom{$fromto{$i}} ) {
   146 	print "\t1:1:1";
   147 	print "\t$i";
   148     } else {
   149 	print "\t1:1:N";
   150 	@tolist = split(/,/,$tofrom{$fromto{$i}});
   151 	print "\t$tolist[0]";
   152 	#$ucs = $tolist[0];
   153 	if ( $sjis =~ /0xF[A-D]../ ) {
   154 	    $ibmnec{$sjis} = $tolist[0];
   155 	    #print IBMNEC "$sjis\t$tolist[0]\n";
   156 	}
   158     }
   159     print SU "$sjis\t$ucs\n";
   160     push(@uslist, "$ucs\t$sjis\n");
   162     #print US "$ucs\t$sjis\n";
   163     if ( $jis ne "") {
   164 	#if ($sjis =~ /^0x87../ || $sjis =~ /^0xED../ ) {
   165 	    # cp932 ext
   166 	if ($sjis =~ /0x..../ && $defined{$sjis} != 1) {
   167 	    # jis not define
   168 	    print JEU "$jis\t$ucs\n";
   169 	    push(@ujelist, "$ucs\t$jis\n");
   170 	    $jisextucs{$jis} = $ucs;
   171 	} else {
   172 	    print JU "$jis\t$ucs\n";
   173 	    push(@ujlist, "$ucs\t$jis\n");
   174 	    $jisucs{$jis} = $ucs;
   175 	}
   177 	#print UJ "$ucs\t$jis\n";
   178     } elsif ( $sjis =~ /\s*0x([8-9A-D].)/ ) {
   179 	$code = $1;
   180 	print JRKU "0x00$code\t$ucs\n";
   181 	push(@ujrklist, "$ucs\t0x00$code\n");
   182     } elsif ( $sjis =~ /\s*0x([0-7].)/ ) {
   183 	$code = $1;
   184 	print JAU "0x00$code\t$ucs\n";
   185 	push(@ujalist, "$ucs\t0x00$code\n");
   186     }
   187     #print "\t# $comment{$i}\n";
   188     print "\n";
   189 }
   191 print US sort(@uslist);
   192 print UJ sort(@ujlist);
   193 print UJE sort(@ujelist);
   194 print UJA sort(@ujalist);
   195 print UJRK sort(@ujrklist);
   197 # make ibmnec mapping
   199 print IBMNEC $NPL;
   200 print IBMNEC "/* generated by $ID */\n";
   201 print IBMNEC "/* IBM ext codes to NEC sel (in CP932) */\n\n";
   203 foreach $i (0xFA, 0xFB, 0xFC) {
   204     for ($j=( ($i==0xFA) ? 0x40 : 0x00 ); $j<=0xFF; $j++) {
   205 	$ibm = sprintf("0x%02X%02X", $i, $j);
   206 	$raw = substr($ibm, 2,6);
   207 	if ("" == $ibmnec{$ibm}) {
   208 	    print IBMNEC "/* $raw:UNDEF */ 0, \n";
   209 	} else {
   210 	    print IBMNEC "/* $raw */ $ibmnec{$ibm}, \n";
   211 	}
   212     }
   213 }
   215 close(IBMNEC);
   217 # make jdx
   219 open (JDX, ">$jdxfile") || die;
   221 print JDX $NPL;
   222 print JDX "/* generated by $ID */\n";
   223 print JDX "/* JIS X 0208 (with CP932 ext) to Unicode mapping */\n";
   225 for ($i=0; $i<94; $i++) {
   226     printf JDX "/* 0x%2XXX */\n", ($i+0x21);
   227     printf JDX "       ";
   228     for ($j=0; $j<94; $j++) {
   229 	$jis = sprintf("0x%02X%02X", ($i+0x21), $j+0x21);
   230 	# get JIS
   231 	$ucs = $jisucs{$jis};
   232 	if ("" == $ucs) {
   233 	    # try CP932 ext
   234 	    # try jis ext
   235 	    $ucs = $jisextucs{$jis}
   236 	}
   237 	if ("" == $ucs) {
   238 	    # undefined
   239 	    print JDX "0xFFFD,";
   240 	} else {
   241 	    print JDX "$ucs,";
   242 	}
   243 	if (7 == ( ($j+1) % 8 )) {
   244 	    printf JDX "/* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8;
   245 	}
   246     }
   247     printf JDX "       /* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8;
   248 }
   250 close (JDX);
   253 close(SU);
   254 close(US);
   255 close(JU);
   256 close(JEU);
   257 close(JAU);
   258 close(JRKU);
   259 close(UJ);
   260 close(UJE);
   261 close(UJA);
   262 close(UJRK);
   264 # generate uf files
   266 sub genuf {
   267     my ($infile, $outfile) = @_;
   268     my $com = "cat $infile | ./umaptable -uf > $gendir/$outfile";
   269     print "Executing $com\n";
   270     system($com);
   271 }
   273 genuf($sufile, "sjis.uf");
   274 genuf($jufile, "jis0208.uf");
   275 if ( $#ujelist > 0 ) {
   276     genuf($jeufile, "jis0208ext.uf");
   277 } else {
   278     print "Extension is not found. jis0208ext.uf is not generated.\n";
   279 }
   280 genuf("$jaufile $jrkufile", "jis0201.uf");
   281 # genuf($jaufile, "jis0201.uf");
   282 # genuf($jrkufile, "jis0201gl.uf");
   285 # generate test page
   288 exit;
   290 sub sjistojis {
   291    my($sjis) = (@_);
   292    my($first,$second,$h, $l, $j0208);
   294    if ( $sjis !~ /^0x....$/ ) {
   295        return "";
   296    }
   298    $first = hex(substr($sjis,2,2));
   299    $second = hex(substr($sjis,4,2));
   300    $jnum=0;
   302    if($first < 0xE0)
   303    {
   304        $jnum = ($first - 0x81) * ((0xfd - 0x80)+(0x7f - 0x40));
   305    } else {
   306        $jnum = ($first - 0xe0 + (0xa0-0x81)) * ((0xfd - 0x80)+(0x7f - 0x40));
   307    }
   308    if($second >= 0x80)
   309    {
   310        $jnum += $second - 0x80 + (0x7f-0x40);
   311    }
   312    else
   313    {
   314        $jnum += $second - 0x40;
   315    }
   316    if(($jnum / 94 ) < 94) {
   317        return sprintf "0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21);
   318    } else {
   319        #return sprintf "# 0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21);
   320        return "";
   321    }
   322 }

mercurial