Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | #!/usr/local/bin/perl |
michael@0 | 2 | use strict; |
michael@0 | 3 | |
michael@0 | 4 | my @source_files; |
michael@0 | 5 | |
michael@0 | 6 | my @sjis_h; |
michael@0 | 7 | $sjis_h[0] = -1; |
michael@0 | 8 | @sjis_h[0x81..0x9f] = map { 0x2100 + $_ * 0x200 } (0 .. 30); |
michael@0 | 9 | @sjis_h[0xe0..0xef] = map { 0x5F00 + $_ * 0x200 } (0 .. 15); |
michael@0 | 10 | @sjis_h[0xf0..0xf9] = (-2) x 10; |
michael@0 | 11 | my @sjis_l; |
michael@0 | 12 | @sjis_l[0x40..0x7e] = (0x21..0x5f); |
michael@0 | 13 | @sjis_l[0x80..0xfc] = (0x60..0x7e, 0x121..0x17e); |
michael@0 | 14 | |
michael@0 | 15 | sub sjis_to_jis { |
michael@0 | 16 | my ($s) = @_; |
michael@0 | 17 | my $j; |
michael@0 | 18 | my $type; |
michael@0 | 19 | |
michael@0 | 20 | my $h = $sjis_h[($s>>8)&0xff]; |
michael@0 | 21 | |
michael@0 | 22 | if ( $h > 0 ) { # jis0208 |
michael@0 | 23 | |
michael@0 | 24 | my $l = $sjis_l[$s&0xff]; |
michael@0 | 25 | if ( $l == 0 ) { |
michael@0 | 26 | $j = $s; |
michael@0 | 27 | $type = 'sjis2undef'; |
michael@0 | 28 | } else { |
michael@0 | 29 | $j = $h + $l; |
michael@0 | 30 | if ( $j >= 0x3000 && $j < 0x7500 ) { # jis0208 kanji |
michael@0 | 31 | $type = 'jis0208'; |
michael@0 | 32 | } elsif ( $j < 0x2900 ) { # jis0208 |
michael@0 | 33 | $type = 'jis0208'; |
michael@0 | 34 | } else { |
michael@0 | 35 | $type = 'jis0208undef'; |
michael@0 | 36 | } |
michael@0 | 37 | } |
michael@0 | 38 | |
michael@0 | 39 | } elsif ( $h == -1 ) { # single byte |
michael@0 | 40 | |
michael@0 | 41 | $j = $s; |
michael@0 | 42 | if ( $s <= 0x7f ) { # jis0201 roman |
michael@0 | 43 | $type = 'jis0201'; |
michael@0 | 44 | } elsif ( $s >= 0xa1 && $s <= 0xdf ) { # jis0201 kana |
michael@0 | 45 | $type = 'jis0201'; |
michael@0 | 46 | } else { # sjis single byte undefined |
michael@0 | 47 | $type = 'sjis1undef'; |
michael@0 | 48 | } |
michael@0 | 49 | |
michael@0 | 50 | } elsif ( $h == -2 ) { # private use |
michael@0 | 51 | $j = $s; |
michael@0 | 52 | $type = 'private'; |
michael@0 | 53 | |
michael@0 | 54 | } else { # sjis undefined |
michael@0 | 55 | $j = $s; |
michael@0 | 56 | $type = 'sjis2undef'; |
michael@0 | 57 | } |
michael@0 | 58 | |
michael@0 | 59 | return ($j, $type); |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | |
michael@0 | 63 | sub read_sjis_map { |
michael@0 | 64 | my ($filename, $s_col, $u_col) = @_; |
michael@0 | 65 | my %map; |
michael@0 | 66 | open MAP, $filename or die $!; |
michael@0 | 67 | while (<MAP>) { |
michael@0 | 68 | my @cols = split /\s+/; |
michael@0 | 69 | my ($s, $u) = @cols[$s_col, $u_col]; |
michael@0 | 70 | $s =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next; |
michael@0 | 71 | |
michael@0 | 72 | $s = oct($s); |
michael@0 | 73 | $u = oct($u); |
michael@0 | 74 | |
michael@0 | 75 | my ($j, $type) = sjis_to_jis($s); |
michael@0 | 76 | push @{$map{$type}}, [$j, $s, $u]; |
michael@0 | 77 | |
michael@0 | 78 | } |
michael@0 | 79 | close MAP or warn $!; |
michael@0 | 80 | push @source_files, $filename; |
michael@0 | 81 | return %map; |
michael@0 | 82 | } |
michael@0 | 83 | |
michael@0 | 84 | sub read_0212_map { |
michael@0 | 85 | my ($filename, $j_col, $u_col) = @_; |
michael@0 | 86 | my $map; |
michael@0 | 87 | open MAP, $filename or die $!; |
michael@0 | 88 | while (<MAP>) { |
michael@0 | 89 | my @cols = split /\s+/; |
michael@0 | 90 | my ($j, $u) = @cols[$j_col, $u_col]; |
michael@0 | 91 | $j =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next; |
michael@0 | 92 | |
michael@0 | 93 | $j = oct($j); |
michael@0 | 94 | $u = oct($u); |
michael@0 | 95 | $u = 0xff5e if $u == 0x007e; |
michael@0 | 96 | |
michael@0 | 97 | push @$map, [$j, 0, $u]; |
michael@0 | 98 | } |
michael@0 | 99 | close MAP or warn $!; |
michael@0 | 100 | push @source_files, $filename; |
michael@0 | 101 | return $map; |
michael@0 | 102 | } |
michael@0 | 103 | |
michael@0 | 104 | |
michael@0 | 105 | my %printed; |
michael@0 | 106 | sub write_fromu_map { |
michael@0 | 107 | my ($filename, $code, @maps) = @_; |
michael@0 | 108 | open MAP, ">$filename" or die $!; |
michael@0 | 109 | foreach my $map (@maps) { |
michael@0 | 110 | foreach my $pair (@$map) { |
michael@0 | 111 | my ($j, $s, $u) = @$pair; |
michael@0 | 112 | if ( $code eq 'sjis' ) { |
michael@0 | 113 | $j = $s; |
michael@0 | 114 | } |
michael@0 | 115 | if ( defined($printed{$u}) ) { |
michael@0 | 116 | if ( $printed{$u} ne $j ) { |
michael@0 | 117 | printf "conflict 0x%04x to 0x%04x, 0x%04x\n", $u, $printed{$u}, $j; |
michael@0 | 118 | } |
michael@0 | 119 | } else { |
michael@0 | 120 | if ( $j < 0x100 ) { |
michael@0 | 121 | printf MAP "0x%02X\t0x%04X\n", $j, $u; |
michael@0 | 122 | } else { |
michael@0 | 123 | printf MAP "0x%04X\t0x%04X\n", $j, $u; |
michael@0 | 124 | } |
michael@0 | 125 | $printed{$u} = $j; |
michael@0 | 126 | } |
michael@0 | 127 | } |
michael@0 | 128 | } |
michael@0 | 129 | close MAP or warn $!; |
michael@0 | 130 | } |
michael@0 | 131 | |
michael@0 | 132 | my @table; |
michael@0 | 133 | my %table; |
michael@0 | 134 | my $table_next_count = 0; |
michael@0 | 135 | |
michael@0 | 136 | sub get_94table_index { |
michael@0 | 137 | my ($map_table) = @_; |
michael@0 | 138 | my $key = join ',', map {int($map_table->[$_])} (0 .. 93); |
michael@0 | 139 | my $table_index = $table{$key}; |
michael@0 | 140 | if ( !defined($table_index) ) { |
michael@0 | 141 | $table_index = $table_next_count; |
michael@0 | 142 | $table_next_count += 94; |
michael@0 | 143 | $table[$table_index] = $map_table; |
michael@0 | 144 | $table{$key} = $table_index; |
michael@0 | 145 | } |
michael@0 | 146 | return $table_index; |
michael@0 | 147 | } |
michael@0 | 148 | |
michael@0 | 149 | sub get_188table_index { |
michael@0 | 150 | my ($map_table) = @_; |
michael@0 | 151 | my $map_table1 = [ @{$map_table}[0 .. 93] ]; |
michael@0 | 152 | my $map_table2 = [ @{$map_table}[94 .. 187] ]; |
michael@0 | 153 | my $key = join ',', map {int($map_table->[$_])} (0 .. 187); |
michael@0 | 154 | my $key1 = join ',', map {int($map_table1->[$_])} (0 .. 93); |
michael@0 | 155 | my $key2 = join ',', map {int($map_table2->[$_])} (0 .. 93); |
michael@0 | 156 | my $table_index = $table{$key}; |
michael@0 | 157 | if ( !defined($table_index) ) { |
michael@0 | 158 | $table_index = $table_next_count; |
michael@0 | 159 | $table_next_count += 188; |
michael@0 | 160 | $table[$table_index] = $map_table1; |
michael@0 | 161 | $table[$table_index + 94] = $map_table2; |
michael@0 | 162 | $table{$key} = $table_index; |
michael@0 | 163 | $table{$key1} = $table_index unless defined($table{$key1}); |
michael@0 | 164 | $table{$key2} = $table_index + 94 unless defined($table{$key2}); |
michael@0 | 165 | } |
michael@0 | 166 | return $table_index; |
michael@0 | 167 | } |
michael@0 | 168 | |
michael@0 | 169 | get_188table_index([]); |
michael@0 | 170 | |
michael@0 | 171 | sub print_sjis_table_index { |
michael@0 | 172 | my @maps = @_; |
michael@0 | 173 | my %map_table; |
michael@0 | 174 | foreach my $map (@maps) { |
michael@0 | 175 | foreach my $pair (@$map) { |
michael@0 | 176 | my ($j, $s, $u) = @$pair; |
michael@0 | 177 | my $row = $s >> 8; |
michael@0 | 178 | my $cell = $s&0xff; |
michael@0 | 179 | if ( $cell >= 0x40 && $cell <= 0x7e ) { |
michael@0 | 180 | $cell -= 0x40; |
michael@0 | 181 | } elsif ( $cell >= 0x80 && $cell <= 0xfc ) { |
michael@0 | 182 | $cell -= 0x41; |
michael@0 | 183 | } else { |
michael@0 | 184 | next; |
michael@0 | 185 | } |
michael@0 | 186 | if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) { |
michael@0 | 187 | print "conflict!\n"; |
michael@0 | 188 | } |
michael@0 | 189 | $map_table{$row}->[$cell] = $u; |
michael@0 | 190 | } |
michael@0 | 191 | } |
michael@0 | 192 | |
michael@0 | 193 | for ( my $i = 0x80; $i < 0x100; $i++ ) { |
michael@0 | 194 | if ( ($i & 0x7) == 0 ) { |
michael@0 | 195 | print MAP "\n "; |
michael@0 | 196 | } |
michael@0 | 197 | if ( $i >= 0xa1 && $i <= 0xdf ) { |
michael@0 | 198 | printf MAP " 0x%04X,", $i + 0xfec0; |
michael@0 | 199 | } elsif ( $i >= 0xf0 && $i <= 0xf9 ) { |
michael@0 | 200 | printf MAP " 0x%04X,", 0xe000 + ($i - 0xf0) * 188; |
michael@0 | 201 | } elsif ( $i == 0x80 ) { |
michael@0 | 202 | print MAP " 0xFFFD,"; |
michael@0 | 203 | } elsif ( $i == 0xa0 ) { |
michael@0 | 204 | print MAP " 0xF8F0,"; |
michael@0 | 205 | } elsif ( $i >= 0xfd ) { |
michael@0 | 206 | printf MAP " 0x%04X,", $i + (0xf8f1 - 0xfd); |
michael@0 | 207 | } else { |
michael@0 | 208 | my $table_index = get_188table_index($map_table{$i}); |
michael@0 | 209 | printf MAP " %6d,", $table_index; |
michael@0 | 210 | } |
michael@0 | 211 | } |
michael@0 | 212 | } |
michael@0 | 213 | |
michael@0 | 214 | sub print_jis_table_index { |
michael@0 | 215 | my @maps = @_; |
michael@0 | 216 | my %map_table; |
michael@0 | 217 | foreach my $map (@maps) { |
michael@0 | 218 | foreach my $pair (@$map) { |
michael@0 | 219 | my ($j, $s, $u) = @$pair; |
michael@0 | 220 | my $row = $j >> 8; |
michael@0 | 221 | my $cell = ($j&0xff) - 0x21; |
michael@0 | 222 | if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) { |
michael@0 | 223 | print "conflict!\n"; |
michael@0 | 224 | } |
michael@0 | 225 | $map_table{$row}->[$cell] = $u; |
michael@0 | 226 | } |
michael@0 | 227 | } |
michael@0 | 228 | |
michael@0 | 229 | for ( my $i = 0; $i < 0x80; $i++ ) { |
michael@0 | 230 | if ( ($i & 0x7) == 0 ) { |
michael@0 | 231 | print MAP "\n "; |
michael@0 | 232 | } |
michael@0 | 233 | if ( $i >= 0x21 && $i <= 0x7e ) { |
michael@0 | 234 | my $table_index = get_94table_index($map_table{$i}); |
michael@0 | 235 | printf MAP " %6d,", $table_index; |
michael@0 | 236 | } else { |
michael@0 | 237 | print MAP " 0xFFFD,"; |
michael@0 | 238 | } |
michael@0 | 239 | } |
michael@0 | 240 | } |
michael@0 | 241 | |
michael@0 | 242 | sub print_table_index { |
michael@0 | 243 | my ($map_name, @maps) = @_; |
michael@0 | 244 | print MAP "static const uint16_t g${map_name}IndexShiftJis[] = {"; |
michael@0 | 245 | print_sjis_table_index(@maps); |
michael@0 | 246 | print MAP "\n};\n"; |
michael@0 | 247 | print MAP "static const uint16_t g${map_name}IndexJis0208[] = {"; |
michael@0 | 248 | print_jis_table_index(@maps); |
michael@0 | 249 | print MAP "\n};\n"; |
michael@0 | 250 | print MAP "static const uint16_t * const g${map_name}Index[] = {"; |
michael@0 | 251 | print MAP "\n g${map_name}IndexShiftJis, g${map_name}IndexJis0208"; |
michael@0 | 252 | print MAP "\n};\n\n"; |
michael@0 | 253 | } |
michael@0 | 254 | |
michael@0 | 255 | sub print_0212_table_index { |
michael@0 | 256 | my ($map_name, @maps) = @_; |
michael@0 | 257 | print MAP "static const uint16_t g${map_name}Index[] = {"; |
michael@0 | 258 | print_jis_table_index(@maps); |
michael@0 | 259 | print MAP "\n};\n\n"; |
michael@0 | 260 | } |
michael@0 | 261 | |
michael@0 | 262 | |
michael@0 | 263 | sub print_table { |
michael@0 | 264 | print MAP "static const uint16_t gJapaneseMap[] = {"; |
michael@0 | 265 | for ( my $i = 0; $i < $table_next_count; $i += 94 ) { |
michael@0 | 266 | my $index = $i; |
michael@0 | 267 | print MAP "\n /* index $index */\n "; |
michael@0 | 268 | my $map_table = $table[$i]; |
michael@0 | 269 | my $print_count = 1; |
michael@0 | 270 | for ( my $j = 0; $j < 94; $j++ ) { |
michael@0 | 271 | my $u = $map_table->[$j]; |
michael@0 | 272 | if ( $u == 0 ) { $u = 0xfffd; } |
michael@0 | 273 | printf MAP " 0x%04X,", $u; |
michael@0 | 274 | if ( ++$print_count == 8 ) { |
michael@0 | 275 | print MAP "\n "; |
michael@0 | 276 | $print_count = 0; |
michael@0 | 277 | } |
michael@0 | 278 | } |
michael@0 | 279 | } |
michael@0 | 280 | print MAP "\n};\n"; |
michael@0 | 281 | } |
michael@0 | 282 | |
michael@0 | 283 | |
michael@0 | 284 | my %cp932 = read_sjis_map('CP932.TXT', 0, 1); |
michael@0 | 285 | my %ibm = read_sjis_map('IBM943.TXT', 0, 1); |
michael@0 | 286 | my $jis0212 = read_0212_map('JIS0212.TXT', 0, 1); |
michael@0 | 287 | |
michael@0 | 288 | %printed = (); |
michael@0 | 289 | write_fromu_map('jis0201-uf-unify', 'jis', |
michael@0 | 290 | $cp932{jis0201}, |
michael@0 | 291 | $ibm{jis0201} |
michael@0 | 292 | ); |
michael@0 | 293 | write_fromu_map('jis0208-uf-unify', 'jis', |
michael@0 | 294 | $cp932{jis0208}, |
michael@0 | 295 | $ibm{jis0208} |
michael@0 | 296 | ); |
michael@0 | 297 | |
michael@0 | 298 | %printed = (); |
michael@0 | 299 | write_fromu_map('jis0208ext-uf-unify', 'jis', |
michael@0 | 300 | $cp932{jis0208undef}, |
michael@0 | 301 | $ibm{jis0208undef} |
michael@0 | 302 | ); |
michael@0 | 303 | |
michael@0 | 304 | %printed = (); |
michael@0 | 305 | write_fromu_map('sjis-uf-unify', 'sjis', |
michael@0 | 306 | @cp932{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'}, |
michael@0 | 307 | @ibm{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'} |
michael@0 | 308 | ); |
michael@0 | 309 | |
michael@0 | 310 | open MAP, ">japanese.map" or die $!; |
michael@0 | 311 | binmode MAP; |
michael@0 | 312 | |
michael@0 | 313 | while (<DATA>) { |
michael@0 | 314 | if ( /^!/ ) { last; } |
michael@0 | 315 | print MAP; |
michael@0 | 316 | } |
michael@0 | 317 | print MAP "/* generated by jamap.pl @source_files */\n\n"; |
michael@0 | 318 | print MAP <<EOM; |
michael@0 | 319 | // IE-compatible handling of undefined codepoints: |
michael@0 | 320 | // 0x80 --> U+0080 |
michael@0 | 321 | // 0xa0 --> U+F8F0 |
michael@0 | 322 | // 0xfd --> U+F8F1 |
michael@0 | 323 | // 0xfe --> U+F8F2 |
michael@0 | 324 | // 0xff --> U+F8F3 |
michael@0 | 325 | EOM |
michael@0 | 326 | |
michael@0 | 327 | print_table_index('CP932', @cp932{'jis0208', 'jis0208undef', 'sjis2undef'}); |
michael@0 | 328 | print_table_index('IBM943', @ibm{'jis0208', 'jis0208undef', 'sjis2undef'}); |
michael@0 | 329 | print_0212_table_index('JIS0212', $jis0212); |
michael@0 | 330 | print_table(); |
michael@0 | 331 | |
michael@0 | 332 | close MAP or warn $!; |
michael@0 | 333 | |
michael@0 | 334 | __DATA__ |
michael@0 | 335 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 336 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 337 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 338 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 339 | |
michael@0 | 340 | ! |