michael@0: #!/usr/local/bin/perl michael@0: use strict; michael@0: michael@0: my @source_files; michael@0: michael@0: my @sjis_h; michael@0: $sjis_h[0] = -1; michael@0: @sjis_h[0x81..0x9f] = map { 0x2100 + $_ * 0x200 } (0 .. 30); michael@0: @sjis_h[0xe0..0xef] = map { 0x5F00 + $_ * 0x200 } (0 .. 15); michael@0: @sjis_h[0xf0..0xf9] = (-2) x 10; michael@0: my @sjis_l; michael@0: @sjis_l[0x40..0x7e] = (0x21..0x5f); michael@0: @sjis_l[0x80..0xfc] = (0x60..0x7e, 0x121..0x17e); michael@0: michael@0: sub sjis_to_jis { michael@0: my ($s) = @_; michael@0: my $j; michael@0: my $type; michael@0: michael@0: my $h = $sjis_h[($s>>8)&0xff]; michael@0: michael@0: if ( $h > 0 ) { # jis0208 michael@0: michael@0: my $l = $sjis_l[$s&0xff]; michael@0: if ( $l == 0 ) { michael@0: $j = $s; michael@0: $type = 'sjis2undef'; michael@0: } else { michael@0: $j = $h + $l; michael@0: if ( $j >= 0x3000 && $j < 0x7500 ) { # jis0208 kanji michael@0: $type = 'jis0208'; michael@0: } elsif ( $j < 0x2900 ) { # jis0208 michael@0: $type = 'jis0208'; michael@0: } else { michael@0: $type = 'jis0208undef'; michael@0: } michael@0: } michael@0: michael@0: } elsif ( $h == -1 ) { # single byte michael@0: michael@0: $j = $s; michael@0: if ( $s <= 0x7f ) { # jis0201 roman michael@0: $type = 'jis0201'; michael@0: } elsif ( $s >= 0xa1 && $s <= 0xdf ) { # jis0201 kana michael@0: $type = 'jis0201'; michael@0: } else { # sjis single byte undefined michael@0: $type = 'sjis1undef'; michael@0: } michael@0: michael@0: } elsif ( $h == -2 ) { # private use michael@0: $j = $s; michael@0: $type = 'private'; michael@0: michael@0: } else { # sjis undefined michael@0: $j = $s; michael@0: $type = 'sjis2undef'; michael@0: } michael@0: michael@0: return ($j, $type); michael@0: } michael@0: michael@0: michael@0: sub read_sjis_map { michael@0: my ($filename, $s_col, $u_col) = @_; michael@0: my %map; michael@0: open MAP, $filename or die $!; michael@0: while () { michael@0: my @cols = split /\s+/; michael@0: my ($s, $u) = @cols[$s_col, $u_col]; michael@0: $s =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next; michael@0: michael@0: $s = oct($s); michael@0: $u = oct($u); michael@0: michael@0: my ($j, $type) = sjis_to_jis($s); michael@0: push @{$map{$type}}, [$j, $s, $u]; michael@0: michael@0: } michael@0: close MAP or warn $!; michael@0: push @source_files, $filename; michael@0: return %map; michael@0: } michael@0: michael@0: sub read_0212_map { michael@0: my ($filename, $j_col, $u_col) = @_; michael@0: my $map; michael@0: open MAP, $filename or die $!; michael@0: while () { michael@0: my @cols = split /\s+/; michael@0: my ($j, $u) = @cols[$j_col, $u_col]; michael@0: $j =~ /^0x[0-9A-Fa-f]+$/ && $u =~ /^0x[0-9A-Fa-f]+$/ or next; michael@0: michael@0: $j = oct($j); michael@0: $u = oct($u); michael@0: $u = 0xff5e if $u == 0x007e; michael@0: michael@0: push @$map, [$j, 0, $u]; michael@0: } michael@0: close MAP or warn $!; michael@0: push @source_files, $filename; michael@0: return $map; michael@0: } michael@0: michael@0: michael@0: my %printed; michael@0: sub write_fromu_map { michael@0: my ($filename, $code, @maps) = @_; michael@0: open MAP, ">$filename" or die $!; michael@0: foreach my $map (@maps) { michael@0: foreach my $pair (@$map) { michael@0: my ($j, $s, $u) = @$pair; michael@0: if ( $code eq 'sjis' ) { michael@0: $j = $s; michael@0: } michael@0: if ( defined($printed{$u}) ) { michael@0: if ( $printed{$u} ne $j ) { michael@0: printf "conflict 0x%04x to 0x%04x, 0x%04x\n", $u, $printed{$u}, $j; michael@0: } michael@0: } else { michael@0: if ( $j < 0x100 ) { michael@0: printf MAP "0x%02X\t0x%04X\n", $j, $u; michael@0: } else { michael@0: printf MAP "0x%04X\t0x%04X\n", $j, $u; michael@0: } michael@0: $printed{$u} = $j; michael@0: } michael@0: } michael@0: } michael@0: close MAP or warn $!; michael@0: } michael@0: michael@0: my @table; michael@0: my %table; michael@0: my $table_next_count = 0; michael@0: michael@0: sub get_94table_index { michael@0: my ($map_table) = @_; michael@0: my $key = join ',', map {int($map_table->[$_])} (0 .. 93); michael@0: my $table_index = $table{$key}; michael@0: if ( !defined($table_index) ) { michael@0: $table_index = $table_next_count; michael@0: $table_next_count += 94; michael@0: $table[$table_index] = $map_table; michael@0: $table{$key} = $table_index; michael@0: } michael@0: return $table_index; michael@0: } michael@0: michael@0: sub get_188table_index { michael@0: my ($map_table) = @_; michael@0: my $map_table1 = [ @{$map_table}[0 .. 93] ]; michael@0: my $map_table2 = [ @{$map_table}[94 .. 187] ]; michael@0: my $key = join ',', map {int($map_table->[$_])} (0 .. 187); michael@0: my $key1 = join ',', map {int($map_table1->[$_])} (0 .. 93); michael@0: my $key2 = join ',', map {int($map_table2->[$_])} (0 .. 93); michael@0: my $table_index = $table{$key}; michael@0: if ( !defined($table_index) ) { michael@0: $table_index = $table_next_count; michael@0: $table_next_count += 188; michael@0: $table[$table_index] = $map_table1; michael@0: $table[$table_index + 94] = $map_table2; michael@0: $table{$key} = $table_index; michael@0: $table{$key1} = $table_index unless defined($table{$key1}); michael@0: $table{$key2} = $table_index + 94 unless defined($table{$key2}); michael@0: } michael@0: return $table_index; michael@0: } michael@0: michael@0: get_188table_index([]); michael@0: michael@0: sub print_sjis_table_index { michael@0: my @maps = @_; michael@0: my %map_table; michael@0: foreach my $map (@maps) { michael@0: foreach my $pair (@$map) { michael@0: my ($j, $s, $u) = @$pair; michael@0: my $row = $s >> 8; michael@0: my $cell = $s&0xff; michael@0: if ( $cell >= 0x40 && $cell <= 0x7e ) { michael@0: $cell -= 0x40; michael@0: } elsif ( $cell >= 0x80 && $cell <= 0xfc ) { michael@0: $cell -= 0x41; michael@0: } else { michael@0: next; michael@0: } michael@0: if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) { michael@0: print "conflict!\n"; michael@0: } michael@0: $map_table{$row}->[$cell] = $u; michael@0: } michael@0: } michael@0: michael@0: for ( my $i = 0x80; $i < 0x100; $i++ ) { michael@0: if ( ($i & 0x7) == 0 ) { michael@0: print MAP "\n "; michael@0: } michael@0: if ( $i >= 0xa1 && $i <= 0xdf ) { michael@0: printf MAP " 0x%04X,", $i + 0xfec0; michael@0: } elsif ( $i >= 0xf0 && $i <= 0xf9 ) { michael@0: printf MAP " 0x%04X,", 0xe000 + ($i - 0xf0) * 188; michael@0: } elsif ( $i == 0x80 ) { michael@0: print MAP " 0xFFFD,"; michael@0: } elsif ( $i == 0xa0 ) { michael@0: print MAP " 0xF8F0,"; michael@0: } elsif ( $i >= 0xfd ) { michael@0: printf MAP " 0x%04X,", $i + (0xf8f1 - 0xfd); michael@0: } else { michael@0: my $table_index = get_188table_index($map_table{$i}); michael@0: printf MAP " %6d,", $table_index; michael@0: } michael@0: } michael@0: } michael@0: michael@0: sub print_jis_table_index { michael@0: my @maps = @_; michael@0: my %map_table; michael@0: foreach my $map (@maps) { michael@0: foreach my $pair (@$map) { michael@0: my ($j, $s, $u) = @$pair; michael@0: my $row = $j >> 8; michael@0: my $cell = ($j&0xff) - 0x21; michael@0: if ( defined($map_table{$row}->[$cell]) && $map_table{$row}->[$cell] != $u ) { michael@0: print "conflict!\n"; michael@0: } michael@0: $map_table{$row}->[$cell] = $u; michael@0: } michael@0: } michael@0: michael@0: for ( my $i = 0; $i < 0x80; $i++ ) { michael@0: if ( ($i & 0x7) == 0 ) { michael@0: print MAP "\n "; michael@0: } michael@0: if ( $i >= 0x21 && $i <= 0x7e ) { michael@0: my $table_index = get_94table_index($map_table{$i}); michael@0: printf MAP " %6d,", $table_index; michael@0: } else { michael@0: print MAP " 0xFFFD,"; michael@0: } michael@0: } michael@0: } michael@0: michael@0: sub print_table_index { michael@0: my ($map_name, @maps) = @_; michael@0: print MAP "static const uint16_t g${map_name}IndexShiftJis[] = {"; michael@0: print_sjis_table_index(@maps); michael@0: print MAP "\n};\n"; michael@0: print MAP "static const uint16_t g${map_name}IndexJis0208[] = {"; michael@0: print_jis_table_index(@maps); michael@0: print MAP "\n};\n"; michael@0: print MAP "static const uint16_t * const g${map_name}Index[] = {"; michael@0: print MAP "\n g${map_name}IndexShiftJis, g${map_name}IndexJis0208"; michael@0: print MAP "\n};\n\n"; michael@0: } michael@0: michael@0: sub print_0212_table_index { michael@0: my ($map_name, @maps) = @_; michael@0: print MAP "static const uint16_t g${map_name}Index[] = {"; michael@0: print_jis_table_index(@maps); michael@0: print MAP "\n};\n\n"; michael@0: } michael@0: michael@0: michael@0: sub print_table { michael@0: print MAP "static const uint16_t gJapaneseMap[] = {"; michael@0: for ( my $i = 0; $i < $table_next_count; $i += 94 ) { michael@0: my $index = $i; michael@0: print MAP "\n /* index $index */\n "; michael@0: my $map_table = $table[$i]; michael@0: my $print_count = 1; michael@0: for ( my $j = 0; $j < 94; $j++ ) { michael@0: my $u = $map_table->[$j]; michael@0: if ( $u == 0 ) { $u = 0xfffd; } michael@0: printf MAP " 0x%04X,", $u; michael@0: if ( ++$print_count == 8 ) { michael@0: print MAP "\n "; michael@0: $print_count = 0; michael@0: } michael@0: } michael@0: } michael@0: print MAP "\n};\n"; michael@0: } michael@0: michael@0: michael@0: my %cp932 = read_sjis_map('CP932.TXT', 0, 1); michael@0: my %ibm = read_sjis_map('IBM943.TXT', 0, 1); michael@0: my $jis0212 = read_0212_map('JIS0212.TXT', 0, 1); michael@0: michael@0: %printed = (); michael@0: write_fromu_map('jis0201-uf-unify', 'jis', michael@0: $cp932{jis0201}, michael@0: $ibm{jis0201} michael@0: ); michael@0: write_fromu_map('jis0208-uf-unify', 'jis', michael@0: $cp932{jis0208}, michael@0: $ibm{jis0208} michael@0: ); michael@0: michael@0: %printed = (); michael@0: write_fromu_map('jis0208ext-uf-unify', 'jis', michael@0: $cp932{jis0208undef}, michael@0: $ibm{jis0208undef} michael@0: ); michael@0: michael@0: %printed = (); michael@0: write_fromu_map('sjis-uf-unify', 'sjis', michael@0: @cp932{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'}, michael@0: @ibm{'jis0201', 'jis0208', 'jis0208undef', 'sjis1undef', 'sjis2undef'} michael@0: ); michael@0: michael@0: open MAP, ">japanese.map" or die $!; michael@0: binmode MAP; michael@0: michael@0: while () { michael@0: if ( /^!/ ) { last; } michael@0: print MAP; michael@0: } michael@0: print MAP "/* generated by jamap.pl @source_files */\n\n"; michael@0: print MAP < U+0080 michael@0: // 0xa0 --> U+F8F0 michael@0: // 0xfd --> U+F8F1 michael@0: // 0xfe --> U+F8F2 michael@0: // 0xff --> U+F8F3 michael@0: EOM michael@0: michael@0: print_table_index('CP932', @cp932{'jis0208', 'jis0208undef', 'sjis2undef'}); michael@0: print_table_index('IBM943', @ibm{'jis0208', 'jis0208undef', 'sjis2undef'}); michael@0: print_0212_table_index('JIS0212', $jis0212); michael@0: print_table(); michael@0: michael@0: close MAP or warn $!; michael@0: michael@0: __DATA__ michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: !