michael@0: #!/usr/bin/env perl michael@0: michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: # You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: # This tool is used to extract "special" (one-to-many) case mappings michael@0: # into a form that can be used by nsTextRunTransformations. michael@0: michael@0: use strict; michael@0: michael@0: if ($#ARGV != 1) { michael@0: print <<__EOT; michael@0: # Run this tool using a command line of the form michael@0: # michael@0: # perl genSpecialCasingData.pl UnicodeData.txt SpecialCasing.txt michael@0: # michael@0: # The nsSpecialCasingData.cpp file will be written to standard output. michael@0: # michael@0: # This tool will also write up-to-date versions of the test files michael@0: # all-{upper,lower,title}.html michael@0: # and corresponding -ref files in the current directory. michael@0: # michael@0: __EOT michael@0: exit 0; michael@0: } michael@0: michael@0: my %allLower; michael@0: my %allUpper; michael@0: my %allTitle; michael@0: my %compositions; michael@0: my %gc; michael@0: open FH, "< $ARGV[0]" or die "can't open $ARGV[0] (should be UnicodeData.txt)\n"; michael@0: while () { michael@0: chomp; michael@0: my @fields = split /;/; michael@0: next if ($fields[1] =~ /) { michael@0: chomp; michael@0: m/#\s*(.+)$/; michael@0: my $comment = $1; michael@0: if ($comment =~ /^(SpecialCasing-|Date:)/) { michael@0: push @headerLines, $comment; michael@0: next; michael@0: } michael@0: s/#.*//; michael@0: s/;\s*$//; michael@0: next if $_ eq ''; michael@0: my @fields = split /; */; michael@0: next unless (scalar @fields) == 4; michael@0: my $usv = hex "0x$fields[0]"; michael@0: addIfSpecial(\%specialLower, $usv, $fields[1]); michael@0: addIfSpecial(\%specialTitle, $usv, $fields[2]); michael@0: addIfSpecial(\%specialUpper, $usv, $fields[3]); michael@0: $charName{$usv} = $comment; michael@0: } michael@0: close FH; michael@0: michael@0: print <<__END__; michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* Auto-generated from files in the Unicode Character Database michael@0: by genSpecialCasingData.pl - do not edit! */ michael@0: michael@0: #include "nsSpecialCasingData.h" michael@0: #include "mozilla/Util.h" // for ArrayLength michael@0: #include // for bsearch michael@0: michael@0: __END__ michael@0: map { print "/* $_ */\n" } @headerLines; michael@0: michael@0: print <<__END__; michael@0: michael@0: using mozilla::unicode::MultiCharMapping; michael@0: michael@0: __END__ michael@0: michael@0: printMappings('Lower', \%specialLower); michael@0: printMappings('Upper', \%specialUpper); michael@0: printMappings('Title', \%specialTitle); michael@0: michael@0: print <<__END__; michael@0: static int CompareMCM(const void* aKey, const void* aElement) michael@0: { michael@0: const uint32_t ch = *static_cast(aKey); michael@0: const MultiCharMapping* mcm = static_cast(aElement); michael@0: return int(ch) - int(mcm->mOriginalChar); michael@0: } michael@0: michael@0: #define MAKE_SPECIAL_CASE_ACCESSOR(which) \\ michael@0: const MultiCharMapping* \\ michael@0: Special##which(uint32_t aChar) \\ michael@0: { \\ michael@0: const void* p = bsearch(&aChar, CaseSpecials_##which, \\ michael@0: mozilla::ArrayLength(CaseSpecials_##which), \\ michael@0: sizeof(MultiCharMapping), CompareMCM); \\ michael@0: return static_cast(p); \\ michael@0: } michael@0: michael@0: namespace mozilla { michael@0: namespace unicode { michael@0: michael@0: MAKE_SPECIAL_CASE_ACCESSOR(Lower) michael@0: MAKE_SPECIAL_CASE_ACCESSOR(Upper) michael@0: MAKE_SPECIAL_CASE_ACCESSOR(Title) michael@0: michael@0: } // namespace unicode michael@0: } // namespace mozilla michael@0: __END__ michael@0: michael@0: addSpecialsTo(\%allLower, \%specialLower); michael@0: addSpecialsTo(\%allUpper, \%specialUpper); michael@0: addSpecialsTo(\%allTitle, \%specialTitle); michael@0: michael@0: my $testFont = "../fonts/dejavu-sans/DejaVuSans.ttf"; michael@0: genTest('lower', \%allLower); michael@0: genTest('upper', \%allUpper); michael@0: genTitleTest(); michael@0: michael@0: sub printMappings { michael@0: my ($whichMapping, $hash) = @_; michael@0: print "static const MultiCharMapping CaseSpecials_${whichMapping}[] = {\n"; michael@0: foreach my $key (sort { $a <=> $b } keys %$hash) { michael@0: my @chars = split(/ /, $hash->{$key}); michael@0: printf " { 0x%04x, {0x%04x, 0x%04x, 0x%04x} }, // %s\n", $key, michael@0: hex "0x0$chars[0]", hex "0x0$chars[1]", hex "0x0$chars[2]", michael@0: "$charName{$key}"; michael@0: } michael@0: print "};\n\n"; michael@0: }; michael@0: michael@0: sub addIfSpecial { michael@0: my ($hash, $usv, $mapping) = @_; michael@0: return unless $mapping =~ / /; michael@0: # only do compositions that start with the initial char michael@0: foreach (keys %compositions) { michael@0: $mapping =~ s/^$_/$compositions{$_}/; michael@0: } michael@0: $hash->{$usv} = $mapping; michael@0: }; michael@0: michael@0: sub addSpecialsTo { michael@0: my ($hash, $specials) = @_; michael@0: foreach my $key (keys %$specials) { michael@0: $hash->{$key} = $specials->{$key}; michael@0: } michael@0: }; michael@0: michael@0: sub genTest { michael@0: my ($whichMapping, $hash) = @_; michael@0: open OUT, "> all-$whichMapping.html"; michael@0: print OUT <<__END__; michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: michael@0:

michael@0: __END__ michael@0: foreach my $key (sort { $a <=> $b } keys %$hash) { michael@0: printf OUT "&#x%04X;", $key; michael@0: print OUT " " if exists $charName{$key}; michael@0: print OUT "\n"; michael@0: } michael@0: print OUT <<__END__; michael@0:

michael@0: michael@0: michael@0: __END__ michael@0: close OUT; michael@0: michael@0: open OUT, "> all-$whichMapping-ref.html"; michael@0: print OUT <<__END__; michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: michael@0:

michael@0: __END__ michael@0: foreach my $key (sort { $a <=> $b } keys %$hash) { michael@0: print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $hash->{$key})); michael@0: print OUT " " if exists $charName{$key}; michael@0: print OUT "\n"; michael@0: } michael@0: print OUT <<__END__; michael@0:

michael@0: michael@0: michael@0: __END__ michael@0: close OUT; michael@0: }; michael@0: michael@0: sub genTitleTest { michael@0: open OUT, "> all-title.html"; michael@0: print OUT <<__END__; michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: michael@0:

michael@0: __END__ michael@0: foreach my $key (sort { $a <=> $b } keys %allTitle) { michael@0: printf OUT "&#x%04X;x", $key; michael@0: print OUT " " if exists $charName{$key}; michael@0: print OUT "\n"; michael@0: } michael@0: print OUT <<__END__; michael@0:

michael@0: michael@0: michael@0: __END__ michael@0: close OUT; michael@0: michael@0: open OUT, "> all-title-ref.html"; michael@0: print OUT <<__END__; michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: michael@0:

michael@0: __END__ michael@0: foreach my $key (sort { $a <=> $b } keys %allTitle) { michael@0: # capitalize is only applied to characters with GC=L* or N*... michael@0: if ($gc{$key} =~ /^[LN]/) { michael@0: # ...and those that are already uppercase are not transformed michael@0: if (exists $allUpper{$key}) { michael@0: print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $allTitle{$key})); michael@0: } else { michael@0: printf OUT "&#x%04X;", $key; michael@0: } michael@0: print OUT "x"; michael@0: } else { michael@0: printf OUT "&#x%04X;X", $key; michael@0: } michael@0: print OUT " " if exists $charName{$key}; michael@0: print OUT "\n"; michael@0: } michael@0: print OUT <<__END__; michael@0:

michael@0: michael@0: michael@0: __END__ michael@0: close OUT; michael@0: };