Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | #!/usr/bin/env perl |
michael@0 | 2 | |
michael@0 | 3 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, |
michael@0 | 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 6 | |
michael@0 | 7 | # This tool is used to extract "special" (one-to-many) case mappings |
michael@0 | 8 | # into a form that can be used by nsTextRunTransformations. |
michael@0 | 9 | |
michael@0 | 10 | use strict; |
michael@0 | 11 | |
michael@0 | 12 | if ($#ARGV != 1) { |
michael@0 | 13 | print <<__EOT; |
michael@0 | 14 | # Run this tool using a command line of the form |
michael@0 | 15 | # |
michael@0 | 16 | # perl genSpecialCasingData.pl UnicodeData.txt SpecialCasing.txt |
michael@0 | 17 | # |
michael@0 | 18 | # The nsSpecialCasingData.cpp file will be written to standard output. |
michael@0 | 19 | # |
michael@0 | 20 | # This tool will also write up-to-date versions of the test files |
michael@0 | 21 | # all-{upper,lower,title}.html |
michael@0 | 22 | # and corresponding -ref files in the current directory. |
michael@0 | 23 | # |
michael@0 | 24 | __EOT |
michael@0 | 25 | exit 0; |
michael@0 | 26 | } |
michael@0 | 27 | |
michael@0 | 28 | my %allLower; |
michael@0 | 29 | my %allUpper; |
michael@0 | 30 | my %allTitle; |
michael@0 | 31 | my %compositions; |
michael@0 | 32 | my %gc; |
michael@0 | 33 | open FH, "< $ARGV[0]" or die "can't open $ARGV[0] (should be UnicodeData.txt)\n"; |
michael@0 | 34 | while (<FH>) { |
michael@0 | 35 | chomp; |
michael@0 | 36 | my @fields = split /;/; |
michael@0 | 37 | next if ($fields[1] =~ /</); # ignore ranges etc |
michael@0 | 38 | my $usv = hex "0x$fields[0]"; |
michael@0 | 39 | $allUpper{$usv} = $fields[12] if $fields[12] ne ''; |
michael@0 | 40 | $allLower{$usv} = $fields[13] if $fields[13] ne ''; |
michael@0 | 41 | $allTitle{$usv} = $fields[14] if $fields[14] ne ''; |
michael@0 | 42 | $gc{$usv} = $fields[2]; |
michael@0 | 43 | # we only care about non-singleton canonical decomps |
michael@0 | 44 | my $decomp = $fields[5]; |
michael@0 | 45 | next if $decomp eq '' or $decomp =~ /</ or not $decomp =~ / /; |
michael@0 | 46 | $compositions{$decomp} = sprintf("%04X", $usv); |
michael@0 | 47 | } |
michael@0 | 48 | close FH; |
michael@0 | 49 | |
michael@0 | 50 | my %specialLower; |
michael@0 | 51 | my %specialUpper; |
michael@0 | 52 | my %specialTitle; |
michael@0 | 53 | my %charName; |
michael@0 | 54 | my @headerLines; |
michael@0 | 55 | open FH, "< $ARGV[1]" or die "can't open $ARGV[1] (should be SpecialCasing.txt)\n"; |
michael@0 | 56 | while (<FH>) { |
michael@0 | 57 | chomp; |
michael@0 | 58 | m/#\s*(.+)$/; |
michael@0 | 59 | my $comment = $1; |
michael@0 | 60 | if ($comment =~ /^(SpecialCasing-|Date:)/) { |
michael@0 | 61 | push @headerLines, $comment; |
michael@0 | 62 | next; |
michael@0 | 63 | } |
michael@0 | 64 | s/#.*//; |
michael@0 | 65 | s/;\s*$//; |
michael@0 | 66 | next if $_ eq ''; |
michael@0 | 67 | my @fields = split /; */; |
michael@0 | 68 | next unless (scalar @fields) == 4; |
michael@0 | 69 | my $usv = hex "0x$fields[0]"; |
michael@0 | 70 | addIfSpecial(\%specialLower, $usv, $fields[1]); |
michael@0 | 71 | addIfSpecial(\%specialTitle, $usv, $fields[2]); |
michael@0 | 72 | addIfSpecial(\%specialUpper, $usv, $fields[3]); |
michael@0 | 73 | $charName{$usv} = $comment; |
michael@0 | 74 | } |
michael@0 | 75 | close FH; |
michael@0 | 76 | |
michael@0 | 77 | print <<__END__; |
michael@0 | 78 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 79 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
michael@0 | 80 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 81 | |
michael@0 | 82 | /* Auto-generated from files in the Unicode Character Database |
michael@0 | 83 | by genSpecialCasingData.pl - do not edit! */ |
michael@0 | 84 | |
michael@0 | 85 | #include "nsSpecialCasingData.h" |
michael@0 | 86 | #include "mozilla/Util.h" // for ArrayLength |
michael@0 | 87 | #include <stdlib.h> // for bsearch |
michael@0 | 88 | |
michael@0 | 89 | __END__ |
michael@0 | 90 | map { print "/* $_ */\n" } @headerLines; |
michael@0 | 91 | |
michael@0 | 92 | print <<__END__; |
michael@0 | 93 | |
michael@0 | 94 | using mozilla::unicode::MultiCharMapping; |
michael@0 | 95 | |
michael@0 | 96 | __END__ |
michael@0 | 97 | |
michael@0 | 98 | printMappings('Lower', \%specialLower); |
michael@0 | 99 | printMappings('Upper', \%specialUpper); |
michael@0 | 100 | printMappings('Title', \%specialTitle); |
michael@0 | 101 | |
michael@0 | 102 | print <<__END__; |
michael@0 | 103 | static int CompareMCM(const void* aKey, const void* aElement) |
michael@0 | 104 | { |
michael@0 | 105 | const uint32_t ch = *static_cast<const uint32_t*>(aKey); |
michael@0 | 106 | const MultiCharMapping* mcm = static_cast<const MultiCharMapping*>(aElement); |
michael@0 | 107 | return int(ch) - int(mcm->mOriginalChar); |
michael@0 | 108 | } |
michael@0 | 109 | |
michael@0 | 110 | #define MAKE_SPECIAL_CASE_ACCESSOR(which) \\ |
michael@0 | 111 | const MultiCharMapping* \\ |
michael@0 | 112 | Special##which(uint32_t aChar) \\ |
michael@0 | 113 | { \\ |
michael@0 | 114 | const void* p = bsearch(&aChar, CaseSpecials_##which, \\ |
michael@0 | 115 | mozilla::ArrayLength(CaseSpecials_##which), \\ |
michael@0 | 116 | sizeof(MultiCharMapping), CompareMCM); \\ |
michael@0 | 117 | return static_cast<const MultiCharMapping*>(p); \\ |
michael@0 | 118 | } |
michael@0 | 119 | |
michael@0 | 120 | namespace mozilla { |
michael@0 | 121 | namespace unicode { |
michael@0 | 122 | |
michael@0 | 123 | MAKE_SPECIAL_CASE_ACCESSOR(Lower) |
michael@0 | 124 | MAKE_SPECIAL_CASE_ACCESSOR(Upper) |
michael@0 | 125 | MAKE_SPECIAL_CASE_ACCESSOR(Title) |
michael@0 | 126 | |
michael@0 | 127 | } // namespace unicode |
michael@0 | 128 | } // namespace mozilla |
michael@0 | 129 | __END__ |
michael@0 | 130 | |
michael@0 | 131 | addSpecialsTo(\%allLower, \%specialLower); |
michael@0 | 132 | addSpecialsTo(\%allUpper, \%specialUpper); |
michael@0 | 133 | addSpecialsTo(\%allTitle, \%specialTitle); |
michael@0 | 134 | |
michael@0 | 135 | my $testFont = "../fonts/dejavu-sans/DejaVuSans.ttf"; |
michael@0 | 136 | genTest('lower', \%allLower); |
michael@0 | 137 | genTest('upper', \%allUpper); |
michael@0 | 138 | genTitleTest(); |
michael@0 | 139 | |
michael@0 | 140 | sub printMappings { |
michael@0 | 141 | my ($whichMapping, $hash) = @_; |
michael@0 | 142 | print "static const MultiCharMapping CaseSpecials_${whichMapping}[] = {\n"; |
michael@0 | 143 | foreach my $key (sort { $a <=> $b } keys %$hash) { |
michael@0 | 144 | my @chars = split(/ /, $hash->{$key}); |
michael@0 | 145 | printf " { 0x%04x, {0x%04x, 0x%04x, 0x%04x} }, // %s\n", $key, |
michael@0 | 146 | hex "0x0$chars[0]", hex "0x0$chars[1]", hex "0x0$chars[2]", |
michael@0 | 147 | "$charName{$key}"; |
michael@0 | 148 | } |
michael@0 | 149 | print "};\n\n"; |
michael@0 | 150 | }; |
michael@0 | 151 | |
michael@0 | 152 | sub addIfSpecial { |
michael@0 | 153 | my ($hash, $usv, $mapping) = @_; |
michael@0 | 154 | return unless $mapping =~ / /; |
michael@0 | 155 | # only do compositions that start with the initial char |
michael@0 | 156 | foreach (keys %compositions) { |
michael@0 | 157 | $mapping =~ s/^$_/$compositions{$_}/; |
michael@0 | 158 | } |
michael@0 | 159 | $hash->{$usv} = $mapping; |
michael@0 | 160 | }; |
michael@0 | 161 | |
michael@0 | 162 | sub addSpecialsTo { |
michael@0 | 163 | my ($hash, $specials) = @_; |
michael@0 | 164 | foreach my $key (keys %$specials) { |
michael@0 | 165 | $hash->{$key} = $specials->{$key}; |
michael@0 | 166 | } |
michael@0 | 167 | }; |
michael@0 | 168 | |
michael@0 | 169 | sub genTest { |
michael@0 | 170 | my ($whichMapping, $hash) = @_; |
michael@0 | 171 | open OUT, "> all-$whichMapping.html"; |
michael@0 | 172 | print OUT <<__END__; |
michael@0 | 173 | <!DOCTYPE html> |
michael@0 | 174 | <html> |
michael@0 | 175 | <head> |
michael@0 | 176 | <meta http-equiv="Content-type" content="text/html; charset=utf-8"> |
michael@0 | 177 | <style type="text/css"> |
michael@0 | 178 | \@font-face { font-family: foo; src: url($testFont); } |
michael@0 | 179 | p { font-family: foo; text-transform: ${whichMapping}case; } |
michael@0 | 180 | </style> |
michael@0 | 181 | </head> |
michael@0 | 182 | <body> |
michael@0 | 183 | <p> |
michael@0 | 184 | __END__ |
michael@0 | 185 | foreach my $key (sort { $a <=> $b } keys %$hash) { |
michael@0 | 186 | printf OUT "&#x%04X;", $key; |
michael@0 | 187 | print OUT " <!-- $charName{$key} -->" if exists $charName{$key}; |
michael@0 | 188 | print OUT "\n"; |
michael@0 | 189 | } |
michael@0 | 190 | print OUT <<__END__; |
michael@0 | 191 | </p> |
michael@0 | 192 | </body> |
michael@0 | 193 | </html> |
michael@0 | 194 | __END__ |
michael@0 | 195 | close OUT; |
michael@0 | 196 | |
michael@0 | 197 | open OUT, "> all-$whichMapping-ref.html"; |
michael@0 | 198 | print OUT <<__END__; |
michael@0 | 199 | <!DOCTYPE html> |
michael@0 | 200 | <html> |
michael@0 | 201 | <head> |
michael@0 | 202 | <meta http-equiv="Content-type" content="text/html; charset=utf-8"> |
michael@0 | 203 | <style type="text/css"> |
michael@0 | 204 | \@font-face { font-family: foo; src: url($testFont); } |
michael@0 | 205 | p { font-family: foo; } |
michael@0 | 206 | </style> |
michael@0 | 207 | </head> |
michael@0 | 208 | <body> |
michael@0 | 209 | <p> |
michael@0 | 210 | __END__ |
michael@0 | 211 | foreach my $key (sort { $a <=> $b } keys %$hash) { |
michael@0 | 212 | print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $hash->{$key})); |
michael@0 | 213 | print OUT " <!-- $charName{$key} -->" if exists $charName{$key}; |
michael@0 | 214 | print OUT "\n"; |
michael@0 | 215 | } |
michael@0 | 216 | print OUT <<__END__; |
michael@0 | 217 | </p> |
michael@0 | 218 | </body> |
michael@0 | 219 | </html> |
michael@0 | 220 | __END__ |
michael@0 | 221 | close OUT; |
michael@0 | 222 | }; |
michael@0 | 223 | |
michael@0 | 224 | sub genTitleTest { |
michael@0 | 225 | open OUT, "> all-title.html"; |
michael@0 | 226 | print OUT <<__END__; |
michael@0 | 227 | <!DOCTYPE html> |
michael@0 | 228 | <html> |
michael@0 | 229 | <head> |
michael@0 | 230 | <meta http-equiv="Content-type" content="text/html; charset=utf-8"> |
michael@0 | 231 | <style type="text/css"> |
michael@0 | 232 | \@font-face { font-family: foo; src: url($testFont); } |
michael@0 | 233 | p { font-family: foo; text-transform: capitalize; } |
michael@0 | 234 | </style> |
michael@0 | 235 | </head> |
michael@0 | 236 | <body> |
michael@0 | 237 | <p> |
michael@0 | 238 | __END__ |
michael@0 | 239 | foreach my $key (sort { $a <=> $b } keys %allTitle) { |
michael@0 | 240 | printf OUT "&#x%04X;x", $key; |
michael@0 | 241 | print OUT " <!-- $charName{$key} -->" if exists $charName{$key}; |
michael@0 | 242 | print OUT "\n"; |
michael@0 | 243 | } |
michael@0 | 244 | print OUT <<__END__; |
michael@0 | 245 | </p> |
michael@0 | 246 | </body> |
michael@0 | 247 | </html> |
michael@0 | 248 | __END__ |
michael@0 | 249 | close OUT; |
michael@0 | 250 | |
michael@0 | 251 | open OUT, "> all-title-ref.html"; |
michael@0 | 252 | print OUT <<__END__; |
michael@0 | 253 | <!DOCTYPE html> |
michael@0 | 254 | <html> |
michael@0 | 255 | <head> |
michael@0 | 256 | <meta http-equiv="Content-type" content="text/html; charset=utf-8"> |
michael@0 | 257 | <style type="text/css"> |
michael@0 | 258 | \@font-face { font-family: foo; src: url($testFont); } |
michael@0 | 259 | p { font-family: foo; } |
michael@0 | 260 | </style> |
michael@0 | 261 | </head> |
michael@0 | 262 | <body> |
michael@0 | 263 | <p> |
michael@0 | 264 | __END__ |
michael@0 | 265 | foreach my $key (sort { $a <=> $b } keys %allTitle) { |
michael@0 | 266 | # capitalize is only applied to characters with GC=L* or N*... |
michael@0 | 267 | if ($gc{$key} =~ /^[LN]/) { |
michael@0 | 268 | # ...and those that are already uppercase are not transformed |
michael@0 | 269 | if (exists $allUpper{$key}) { |
michael@0 | 270 | print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $allTitle{$key})); |
michael@0 | 271 | } else { |
michael@0 | 272 | printf OUT "&#x%04X;", $key; |
michael@0 | 273 | } |
michael@0 | 274 | print OUT "x"; |
michael@0 | 275 | } else { |
michael@0 | 276 | printf OUT "&#x%04X;X", $key; |
michael@0 | 277 | } |
michael@0 | 278 | print OUT " <!-- $charName{$key} -->" if exists $charName{$key}; |
michael@0 | 279 | print OUT "\n"; |
michael@0 | 280 | } |
michael@0 | 281 | print OUT <<__END__; |
michael@0 | 282 | </p> |
michael@0 | 283 | </body> |
michael@0 | 284 | </html> |
michael@0 | 285 | __END__ |
michael@0 | 286 | close OUT; |
michael@0 | 287 | }; |