|
1 #!/usr/bin/env perl |
|
2 |
|
3 # This Source Code Form is subject to the terms of the Mozilla Public |
|
4 # License, v. 2.0. If a copy of the MPL was not distributed with this file, |
|
5 # You can obtain one at http://mozilla.org/MPL/2.0/. |
|
6 |
|
7 # This tool is used to extract "special" (one-to-many) case mappings |
|
8 # into a form that can be used by nsTextRunTransformations. |
|
9 |
|
10 use strict; |
|
11 |
|
12 if ($#ARGV != 1) { |
|
13 print <<__EOT; |
|
14 # Run this tool using a command line of the form |
|
15 # |
|
16 # perl genSpecialCasingData.pl UnicodeData.txt SpecialCasing.txt |
|
17 # |
|
18 # The nsSpecialCasingData.cpp file will be written to standard output. |
|
19 # |
|
20 # This tool will also write up-to-date versions of the test files |
|
21 # all-{upper,lower,title}.html |
|
22 # and corresponding -ref files in the current directory. |
|
23 # |
|
24 __EOT |
|
25 exit 0; |
|
26 } |
|
27 |
|
28 my %allLower; |
|
29 my %allUpper; |
|
30 my %allTitle; |
|
31 my %compositions; |
|
32 my %gc; |
|
33 open FH, "< $ARGV[0]" or die "can't open $ARGV[0] (should be UnicodeData.txt)\n"; |
|
34 while (<FH>) { |
|
35 chomp; |
|
36 my @fields = split /;/; |
|
37 next if ($fields[1] =~ /</); # ignore ranges etc |
|
38 my $usv = hex "0x$fields[0]"; |
|
39 $allUpper{$usv} = $fields[12] if $fields[12] ne ''; |
|
40 $allLower{$usv} = $fields[13] if $fields[13] ne ''; |
|
41 $allTitle{$usv} = $fields[14] if $fields[14] ne ''; |
|
42 $gc{$usv} = $fields[2]; |
|
43 # we only care about non-singleton canonical decomps |
|
44 my $decomp = $fields[5]; |
|
45 next if $decomp eq '' or $decomp =~ /</ or not $decomp =~ / /; |
|
46 $compositions{$decomp} = sprintf("%04X", $usv); |
|
47 } |
|
48 close FH; |
|
49 |
|
50 my %specialLower; |
|
51 my %specialUpper; |
|
52 my %specialTitle; |
|
53 my %charName; |
|
54 my @headerLines; |
|
55 open FH, "< $ARGV[1]" or die "can't open $ARGV[1] (should be SpecialCasing.txt)\n"; |
|
56 while (<FH>) { |
|
57 chomp; |
|
58 m/#\s*(.+)$/; |
|
59 my $comment = $1; |
|
60 if ($comment =~ /^(SpecialCasing-|Date:)/) { |
|
61 push @headerLines, $comment; |
|
62 next; |
|
63 } |
|
64 s/#.*//; |
|
65 s/;\s*$//; |
|
66 next if $_ eq ''; |
|
67 my @fields = split /; */; |
|
68 next unless (scalar @fields) == 4; |
|
69 my $usv = hex "0x$fields[0]"; |
|
70 addIfSpecial(\%specialLower, $usv, $fields[1]); |
|
71 addIfSpecial(\%specialTitle, $usv, $fields[2]); |
|
72 addIfSpecial(\%specialUpper, $usv, $fields[3]); |
|
73 $charName{$usv} = $comment; |
|
74 } |
|
75 close FH; |
|
76 |
|
77 print <<__END__; |
|
78 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
79 * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
|
80 * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
81 |
|
82 /* Auto-generated from files in the Unicode Character Database |
|
83 by genSpecialCasingData.pl - do not edit! */ |
|
84 |
|
85 #include "nsSpecialCasingData.h" |
|
86 #include "mozilla/Util.h" // for ArrayLength |
|
87 #include <stdlib.h> // for bsearch |
|
88 |
|
89 __END__ |
|
90 map { print "/* $_ */\n" } @headerLines; |
|
91 |
|
92 print <<__END__; |
|
93 |
|
94 using mozilla::unicode::MultiCharMapping; |
|
95 |
|
96 __END__ |
|
97 |
|
98 printMappings('Lower', \%specialLower); |
|
99 printMappings('Upper', \%specialUpper); |
|
100 printMappings('Title', \%specialTitle); |
|
101 |
|
102 print <<__END__; |
|
103 static int CompareMCM(const void* aKey, const void* aElement) |
|
104 { |
|
105 const uint32_t ch = *static_cast<const uint32_t*>(aKey); |
|
106 const MultiCharMapping* mcm = static_cast<const MultiCharMapping*>(aElement); |
|
107 return int(ch) - int(mcm->mOriginalChar); |
|
108 } |
|
109 |
|
110 #define MAKE_SPECIAL_CASE_ACCESSOR(which) \\ |
|
111 const MultiCharMapping* \\ |
|
112 Special##which(uint32_t aChar) \\ |
|
113 { \\ |
|
114 const void* p = bsearch(&aChar, CaseSpecials_##which, \\ |
|
115 mozilla::ArrayLength(CaseSpecials_##which), \\ |
|
116 sizeof(MultiCharMapping), CompareMCM); \\ |
|
117 return static_cast<const MultiCharMapping*>(p); \\ |
|
118 } |
|
119 |
|
120 namespace mozilla { |
|
121 namespace unicode { |
|
122 |
|
123 MAKE_SPECIAL_CASE_ACCESSOR(Lower) |
|
124 MAKE_SPECIAL_CASE_ACCESSOR(Upper) |
|
125 MAKE_SPECIAL_CASE_ACCESSOR(Title) |
|
126 |
|
127 } // namespace unicode |
|
128 } // namespace mozilla |
|
129 __END__ |
|
130 |
|
131 addSpecialsTo(\%allLower, \%specialLower); |
|
132 addSpecialsTo(\%allUpper, \%specialUpper); |
|
133 addSpecialsTo(\%allTitle, \%specialTitle); |
|
134 |
|
135 my $testFont = "../fonts/dejavu-sans/DejaVuSans.ttf"; |
|
136 genTest('lower', \%allLower); |
|
137 genTest('upper', \%allUpper); |
|
138 genTitleTest(); |
|
139 |
|
140 sub printMappings { |
|
141 my ($whichMapping, $hash) = @_; |
|
142 print "static const MultiCharMapping CaseSpecials_${whichMapping}[] = {\n"; |
|
143 foreach my $key (sort { $a <=> $b } keys %$hash) { |
|
144 my @chars = split(/ /, $hash->{$key}); |
|
145 printf " { 0x%04x, {0x%04x, 0x%04x, 0x%04x} }, // %s\n", $key, |
|
146 hex "0x0$chars[0]", hex "0x0$chars[1]", hex "0x0$chars[2]", |
|
147 "$charName{$key}"; |
|
148 } |
|
149 print "};\n\n"; |
|
150 }; |
|
151 |
|
152 sub addIfSpecial { |
|
153 my ($hash, $usv, $mapping) = @_; |
|
154 return unless $mapping =~ / /; |
|
155 # only do compositions that start with the initial char |
|
156 foreach (keys %compositions) { |
|
157 $mapping =~ s/^$_/$compositions{$_}/; |
|
158 } |
|
159 $hash->{$usv} = $mapping; |
|
160 }; |
|
161 |
|
162 sub addSpecialsTo { |
|
163 my ($hash, $specials) = @_; |
|
164 foreach my $key (keys %$specials) { |
|
165 $hash->{$key} = $specials->{$key}; |
|
166 } |
|
167 }; |
|
168 |
|
169 sub genTest { |
|
170 my ($whichMapping, $hash) = @_; |
|
171 open OUT, "> all-$whichMapping.html"; |
|
172 print OUT <<__END__; |
|
173 <!DOCTYPE html> |
|
174 <html> |
|
175 <head> |
|
176 <meta http-equiv="Content-type" content="text/html; charset=utf-8"> |
|
177 <style type="text/css"> |
|
178 \@font-face { font-family: foo; src: url($testFont); } |
|
179 p { font-family: foo; text-transform: ${whichMapping}case; } |
|
180 </style> |
|
181 </head> |
|
182 <body> |
|
183 <p> |
|
184 __END__ |
|
185 foreach my $key (sort { $a <=> $b } keys %$hash) { |
|
186 printf OUT "&#x%04X;", $key; |
|
187 print OUT " <!-- $charName{$key} -->" if exists $charName{$key}; |
|
188 print OUT "\n"; |
|
189 } |
|
190 print OUT <<__END__; |
|
191 </p> |
|
192 </body> |
|
193 </html> |
|
194 __END__ |
|
195 close OUT; |
|
196 |
|
197 open OUT, "> all-$whichMapping-ref.html"; |
|
198 print OUT <<__END__; |
|
199 <!DOCTYPE html> |
|
200 <html> |
|
201 <head> |
|
202 <meta http-equiv="Content-type" content="text/html; charset=utf-8"> |
|
203 <style type="text/css"> |
|
204 \@font-face { font-family: foo; src: url($testFont); } |
|
205 p { font-family: foo; } |
|
206 </style> |
|
207 </head> |
|
208 <body> |
|
209 <p> |
|
210 __END__ |
|
211 foreach my $key (sort { $a <=> $b } keys %$hash) { |
|
212 print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $hash->{$key})); |
|
213 print OUT " <!-- $charName{$key} -->" if exists $charName{$key}; |
|
214 print OUT "\n"; |
|
215 } |
|
216 print OUT <<__END__; |
|
217 </p> |
|
218 </body> |
|
219 </html> |
|
220 __END__ |
|
221 close OUT; |
|
222 }; |
|
223 |
|
224 sub genTitleTest { |
|
225 open OUT, "> all-title.html"; |
|
226 print OUT <<__END__; |
|
227 <!DOCTYPE html> |
|
228 <html> |
|
229 <head> |
|
230 <meta http-equiv="Content-type" content="text/html; charset=utf-8"> |
|
231 <style type="text/css"> |
|
232 \@font-face { font-family: foo; src: url($testFont); } |
|
233 p { font-family: foo; text-transform: capitalize; } |
|
234 </style> |
|
235 </head> |
|
236 <body> |
|
237 <p> |
|
238 __END__ |
|
239 foreach my $key (sort { $a <=> $b } keys %allTitle) { |
|
240 printf OUT "&#x%04X;x", $key; |
|
241 print OUT " <!-- $charName{$key} -->" if exists $charName{$key}; |
|
242 print OUT "\n"; |
|
243 } |
|
244 print OUT <<__END__; |
|
245 </p> |
|
246 </body> |
|
247 </html> |
|
248 __END__ |
|
249 close OUT; |
|
250 |
|
251 open OUT, "> all-title-ref.html"; |
|
252 print OUT <<__END__; |
|
253 <!DOCTYPE html> |
|
254 <html> |
|
255 <head> |
|
256 <meta http-equiv="Content-type" content="text/html; charset=utf-8"> |
|
257 <style type="text/css"> |
|
258 \@font-face { font-family: foo; src: url($testFont); } |
|
259 p { font-family: foo; } |
|
260 </style> |
|
261 </head> |
|
262 <body> |
|
263 <p> |
|
264 __END__ |
|
265 foreach my $key (sort { $a <=> $b } keys %allTitle) { |
|
266 # capitalize is only applied to characters with GC=L* or N*... |
|
267 if ($gc{$key} =~ /^[LN]/) { |
|
268 # ...and those that are already uppercase are not transformed |
|
269 if (exists $allUpper{$key}) { |
|
270 print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $allTitle{$key})); |
|
271 } else { |
|
272 printf OUT "&#x%04X;", $key; |
|
273 } |
|
274 print OUT "x"; |
|
275 } else { |
|
276 printf OUT "&#x%04X;X", $key; |
|
277 } |
|
278 print OUT " <!-- $charName{$key} -->" if exists $charName{$key}; |
|
279 print OUT "\n"; |
|
280 } |
|
281 print OUT <<__END__; |
|
282 </p> |
|
283 </body> |
|
284 </html> |
|
285 __END__ |
|
286 close OUT; |
|
287 }; |