intl/uconv/tools/jamap.pl

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 #!/usr/local/bin/perl
     2 use strict;
     4 my @source_files;
     6 my @sjis_h;
     7 $sjis_h[0] = -1;
     8 @sjis_h[0x81..0x9f] = map { 0x2100 + $_ * 0x200 } (0 .. 30);
     9 @sjis_h[0xe0..0xef] = map { 0x5F00 + $_ * 0x200 } (0 .. 15);
    10 @sjis_h[0xf0..0xf9] = (-2) x 10;
    11 my @sjis_l;
    12 @sjis_l[0x40..0x7e] = (0x21..0x5f);
    13 @sjis_l[0x80..0xfc] = (0x60..0x7e, 0x121..0x17e);
    15 sub sjis_to_jis {
    16   my ($s) = @_;
    17   my $j;
    18   my $type;
    20   my $h = $sjis_h[($s>>8)&0xff];
    22   if ( $h > 0 ) { # jis0208
    24     my $l = $sjis_l[$s&0xff];
    25     if ( $l == 0 ) {
    26       $j = $s;
    27       $type = 'sjis2undef';
    28     } else {
    29       $j = $h + $l;
    30       if ( $j >= 0x3000 && $j < 0x7500 ) { # jis0208 kanji
    31         $type = 'jis0208';
    32       } elsif ( $j < 0x2900 ) { # jis0208
    33         $type = 'jis0208';
    34       } else {
    35         $type = 'jis0208undef';
    36       }
    37     }
    39   } elsif ( $h == -1 ) { # single byte
    41     $j = $s;
    42     if ( $s <= 0x7f ) { # jis0201 roman
    43       $type = 'jis0201';
    44     } elsif ( $s >= 0xa1 && $s <= 0xdf ) { # jis0201 kana
    45       $type = 'jis0201';
    46     } else { # sjis single byte undefined
    47       $type = 'sjis1undef';
    48     }
    50   } elsif ( $h == -2 ) { # private use
    51     $j = $s;
    52     $type = 'private';
    54   } else { # sjis undefined
    55     $j = $s;
    56     $type = 'sjis2undef';
    57   }
    59   return ($j, $type);
    60 }
    63 sub read_sjis_map {
    64   my ($filename, $s_col, $u_col) = @_;
    65   my %map;
    66   open MAP, $filename or die $!;
    67   while (<MAP>) {
    68     my @cols = split /\s+/;
    69     my ($s, $u) = @cols[$s_col, $u_col];
    70     $s =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next;
    72     $s = oct($s);
    73     $u = oct($u);
    75     my ($j, $type) = sjis_to_jis($s);
    76     push @{$map{$type}}, [$j, $s, $u];
    78   }
    79   close MAP or warn $!;
    80   push @source_files, $filename;
    81   return %map;
    82 }
    84 sub read_0212_map {
    85   my ($filename, $j_col, $u_col) = @_;
    86   my $map;
    87   open MAP, $filename or die $!;
    88   while (<MAP>) {
    89     my @cols = split /\s+/;
    90     my ($j, $u) = @cols[$j_col, $u_col];
    91     $j =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next;
    93     $j = oct($j);
    94     $u = oct($u);
    95     $u = 0xff5e if $u == 0x007e;
    97     push @$map, [$j, 0, $u];
    98   }
    99   close MAP or warn $!;
   100   push @source_files, $filename;
   101   return $map;
   102 }
   105 my %printed;
   106 sub write_fromu_map {
   107   my ($filename, $code, @maps) = @_;
   108   open MAP, ">$filename" or die $!;
   109   foreach my $map (@maps) {
   110     foreach my $pair (@$map) {
   111       my ($j, $s, $u) = @$pair;
   112       if ( $code eq 'sjis' ) {
   113         $j = $s;
   114       }
   115       if ( defined($printed{$u}) ) {
   116         if ( $printed{$u} ne $j ) {
   117           printf "conflict 0x%04x to 0x%04x, 0x%04x\n", $u, $printed{$u}, $j;
   118         }
   119       } else {
   120         if ( $j < 0x100 ) {
   121           printf MAP "0x%02X\t0x%04X\n", $j, $u;
   122         } else {
   123           printf MAP "0x%04X\t0x%04X\n", $j, $u;
   124         }
   125         $printed{$u} = $j;
   126       }
   127     }
   128   }
   129   close MAP or warn $!;
   130 }
   132 my @table;
   133 my %table;
   134 my $table_next_count = 0;
   136 sub get_94table_index {
   137   my ($map_table) = @_;
   138   my $key = join ',', map {int($map_table->[$_])} (0 .. 93);
   139   my $table_index = $table{$key};
   140   if ( !defined($table_index) ) {
   141     $table_index = $table_next_count;
   142     $table_next_count += 94;
   143     $table[$table_index] = $map_table;
   144     $table{$key} = $table_index;
   145   }
   146   return $table_index;
   147 }
   149 sub get_188table_index {
   150   my ($map_table) = @_;
   151   my $map_table1 = [ @{$map_table}[0 .. 93] ];
   152   my $map_table2 = [ @{$map_table}[94 .. 187] ];
   153   my $key = join ',', map {int($map_table->[$_])} (0 .. 187);
   154   my $key1 = join ',', map {int($map_table1->[$_])} (0 .. 93);
   155   my $key2 = join ',', map {int($map_table2->[$_])} (0 .. 93);
   156   my $table_index = $table{$key};
   157   if ( !defined($table_index) ) {
   158     $table_index = $table_next_count;
   159     $table_next_count += 188;
   160     $table[$table_index] = $map_table1;
   161     $table[$table_index + 94] = $map_table2;
   162     $table{$key} = $table_index;
   163     $table{$key1} = $table_index unless defined($table{$key1});
   164     $table{$key2} = $table_index + 94 unless defined($table{$key2});
   165   }
   166   return $table_index;
   167 }
   169 get_188table_index([]);
   171 sub print_sjis_table_index {
   172   my @maps = @_;
   173   my %map_table;
   174   foreach my $map (@maps) {
   175     foreach my $pair (@$map) {
   176       my ($j, $s, $u) = @$pair;
   177       my $row = $s >> 8;
   178       my $cell = $s&0xff;
   179       if ( $cell >= 0x40 && $cell <= 0x7e ) {
   180         $cell -= 0x40;
   181       } elsif ( $cell >= 0x80 && $cell <= 0xfc ) {
   182         $cell -= 0x41;
   183       } else {
   184         next;
   185       }
   186       if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) {
   187          print "conflict!\n";
   188       }
   189       $map_table{$row}->[$cell] = $u;
   190     }
   191   }
   193   for ( my $i = 0x80; $i < 0x100; $i++ ) {
   194     if ( ($i & 0x7) == 0 ) {
   195       print MAP "\n ";
   196     }
   197     if ( $i >= 0xa1 && $i <= 0xdf ) {
   198       printf MAP " 0x%04X,", $i + 0xfec0;
   199     } elsif ( $i >= 0xf0 && $i <= 0xf9 ) {
   200       printf MAP " 0x%04X,", 0xe000 + ($i - 0xf0) * 188;
   201     } elsif ( $i == 0x80 ) {
   202       print MAP " 0xFFFD,";
   203     } elsif ( $i == 0xa0 ) {
   204       print MAP " 0xF8F0,";
   205     } elsif ( $i >= 0xfd ) {
   206       printf MAP " 0x%04X,", $i + (0xf8f1 - 0xfd);
   207     } else {
   208       my $table_index = get_188table_index($map_table{$i});
   209       printf MAP " %6d,", $table_index;
   210     }
   211   }
   212 }
   214 sub print_jis_table_index {
   215   my @maps = @_;
   216   my %map_table;
   217   foreach my $map (@maps) {
   218     foreach my $pair (@$map) {
   219       my ($j, $s, $u) = @$pair;
   220       my $row = $j >> 8;
   221       my $cell = ($j&0xff) - 0x21;
   222       if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) {
   223          print "conflict!\n";
   224       }
   225       $map_table{$row}->[$cell] = $u;
   226     }
   227   }
   229   for ( my $i = 0; $i < 0x80; $i++ ) {
   230     if ( ($i & 0x7) == 0 ) {
   231       print MAP "\n ";
   232     }
   233     if ( $i >= 0x21 && $i <= 0x7e ) {
   234       my $table_index = get_94table_index($map_table{$i});
   235       printf MAP " %6d,", $table_index;
   236     } else {
   237       print MAP " 0xFFFD,";
   238     }
   239   }
   240 }
   242 sub print_table_index {
   243   my ($map_name, @maps) = @_;
   244   print MAP "static const uint16_t g${map_name}IndexShiftJis[] = {";
   245   print_sjis_table_index(@maps);
   246   print MAP "\n};\n";
   247   print MAP "static const uint16_t g${map_name}IndexJis0208[] = {";
   248   print_jis_table_index(@maps);
   249   print MAP "\n};\n";
   250   print MAP "static const uint16_t * const g${map_name}Index[] = {";
   251   print MAP "\n  g${map_name}IndexShiftJis, g${map_name}IndexJis0208";
   252   print MAP "\n};\n\n";
   253 }
   255 sub print_0212_table_index {
   256   my ($map_name, @maps) = @_;
   257   print MAP "static const uint16_t g${map_name}Index[] = {";
   258   print_jis_table_index(@maps);
   259   print MAP "\n};\n\n";
   260 }
   263 sub print_table {
   264   print MAP "static const uint16_t gJapaneseMap[] = {";
   265   for ( my $i = 0; $i < $table_next_count; $i += 94 ) {
   266     my $index = $i;
   267     print MAP "\n  /* index $index */\n         ";
   268     my $map_table = $table[$i];
   269     my $print_count = 1;
   270     for ( my $j = 0; $j < 94; $j++ ) {
   271       my $u = $map_table->[$j];
   272       if ( $u == 0 ) { $u = 0xfffd; }
   273       printf MAP " 0x%04X,", $u;
   274       if ( ++$print_count == 8 ) {
   275         print MAP "\n ";
   276         $print_count = 0;
   277       }
   278     }
   279   }
   280   print MAP "\n};\n";
   281 }
   284 my %cp932 = read_sjis_map('CP932.TXT', 0, 1);
   285 my %ibm = read_sjis_map('IBM943.TXT', 0, 1);
   286 my $jis0212 = read_0212_map('JIS0212.TXT', 0, 1);
   288 %printed = ();
   289 write_fromu_map('jis0201-uf-unify', 'jis',
   290   $cp932{jis0201},
   291   $ibm{jis0201}
   292 );
   293 write_fromu_map('jis0208-uf-unify', 'jis',
   294   $cp932{jis0208},
   295   $ibm{jis0208}
   296 );
   298 %printed = ();
   299 write_fromu_map('jis0208ext-uf-unify', 'jis',
   300   $cp932{jis0208undef},
   301   $ibm{jis0208undef}
   302 );
   304 %printed = ();
   305 write_fromu_map('sjis-uf-unify', 'sjis',
   306   @cp932{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'},
   307   @ibm{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'}
   308 );
   310 open MAP, ">japanese.map" or die $!;
   311 binmode MAP;
   313 while (<DATA>) {
   314   if ( /^!/ ) { last; }
   315   print MAP;
   316 }
   317 print MAP "/* generated by jamap.pl @source_files */\n\n";
   318 print MAP <<EOM;
   319 // IE-compatible handling of undefined codepoints:
   320 // 0x80 --> U+0080
   321 // 0xa0 --> U+F8F0
   322 // 0xfd --> U+F8F1
   323 // 0xfe --> U+F8F2
   324 // 0xff --> U+F8F3
   325 EOM
   327 print_table_index('CP932', @cp932{'jis0208', 'jis0208undef', 'sjis2undef'});
   328 print_table_index('IBM943', @ibm{'jis0208', 'jis0208undef', 'sjis2undef'});
   329 print_0212_table_index('JIS0212', $jis0212);
   330 print_table();
   332 close MAP or warn $!;
   334 __DATA__
   335 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   336 /* This Source Code Form is subject to the terms of the Mozilla Public
   337  * License, v. 2.0. If a copy of the MPL was not distributed with this
   338  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   340 !

mercurial