|
1 #!/usr/bin/perl |
|
2 # -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*- |
|
3 # This Source Code Form is subject to the terms of the Mozilla Public |
|
4 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
6 |
|
7 use XML::LibXSLT; |
|
8 use XML::LibXML; |
|
9 use LWP::Simple; |
|
10 |
|
11 # output files |
|
12 $FILE_UNICODE = "unicode.xml"; |
|
13 $FILE_DICTIONARY = "dictionary.xml"; |
|
14 $FILE_DIFFERENCES = "differences.txt"; |
|
15 $FILE_NEW_DICTIONARY = "new_dictionary.txt"; |
|
16 $FILE_SYNTAX_ERRORS = "syntax_errors.txt"; |
|
17 $FILE_JS = "tests/stretchy-and-large-operators.js"; |
|
18 |
|
19 # our dictionary (property file) |
|
20 $MOZ_DICTIONARY = "mathfont.properties"; |
|
21 |
|
22 # dictionary provided by the W3C in "XML Entity Definitions for Characters" |
|
23 $WG_DICTIONARY_URL = "http://www.w3.org/2003/entities/2007xml/unicode.xml"; |
|
24 |
|
25 # XSL stylesheet to extract relevant data from the dictionary |
|
26 $DICTIONARY_XSL = "operatorDictionary.xsl"; |
|
27 |
|
28 # dictionary provided by the W3C transformed with operatorDictionary.xsl |
|
29 $WG_DICTIONARY = $FILE_DICTIONARY; |
|
30 |
|
31 if (!($#ARGV >= 0 && |
|
32 ((($ARGV[0] eq "download") && $#ARGV <= 1) || |
|
33 (($ARGV[0] eq "compare") && $#ARGV <= 1) || |
|
34 (($ARGV[0] eq "check") && $#ARGV <= 0) || |
|
35 (($ARGV[0] eq "make-js") && $#ARGV <= 0) || |
|
36 (($ARGV[0] eq "clean") && $#ARGV <= 0)))) { |
|
37 &usage; |
|
38 } |
|
39 |
|
40 if ($ARGV[0] eq "download") { |
|
41 if ($#ARGV == 1) { |
|
42 $WG_DICTIONARY_URL = $ARGV[1]; |
|
43 } |
|
44 print "Downloading $WG_DICTIONARY_URL...\n"; |
|
45 getstore($WG_DICTIONARY_URL, $FILE_UNICODE); |
|
46 |
|
47 print "Converting $FILE_UNICODE into $FILE_DICTIONARY...\n"; |
|
48 my $xslt = XML::LibXSLT->new(); |
|
49 my $source = XML::LibXML->load_xml(location => $FILE_UNICODE); |
|
50 my $style_doc = XML::LibXML->load_xml(location => $DICTIONARY_XSL, |
|
51 no_cdata=>1); |
|
52 my $stylesheet = $xslt->parse_stylesheet($style_doc); |
|
53 my $results = $stylesheet->transform($source); |
|
54 open($file, ">$FILE_DICTIONARY") || die ("Couldn't open $FILE_DICTIONARY!"); |
|
55 print $file $stylesheet->output_as_bytes($results); |
|
56 close($file); |
|
57 exit 0; |
|
58 } |
|
59 |
|
60 if ($ARGV[0] eq "clean") { |
|
61 unlink($FILE_UNICODE, |
|
62 $FILE_DICTIONARY, |
|
63 $FILE_DIFFERENCES, |
|
64 $FILE_NEW_DICTIONARY, |
|
65 $FILE_SYNTAX_ERRORS); |
|
66 exit 0; |
|
67 } |
|
68 |
|
69 if ($ARGV[0] eq "compare" && $#ARGV == 1) { |
|
70 $WG_DICTIONARY = $ARGV[1]; |
|
71 } |
|
72 |
|
73 ################################################################################ |
|
74 # structure of the dictionary used by this script: |
|
75 # - key: same as in mathfont.properties |
|
76 # - table: |
|
77 # index | value |
|
78 # 0 | description |
|
79 # 1 | lspace |
|
80 # 2 | rspace |
|
81 # 3 | minsize |
|
82 # 4 | largeop |
|
83 # 5 | movablelimits |
|
84 # 6 | stretchy |
|
85 # 7 | separator |
|
86 # 8 | accent |
|
87 # 9 | fence |
|
88 # 10 | symmetric |
|
89 # 11 | priority |
|
90 # 12 | linebreakstyle |
|
91 # 13 | direction |
|
92 # 14 | integral |
|
93 # 15 | mirrorable |
|
94 |
|
95 # 1) build %moz_hash from $MOZ_DICTIONARY |
|
96 |
|
97 print "loading $MOZ_DICTIONARY...\n"; |
|
98 open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!"); |
|
99 |
|
100 print "building dictionary...\n"; |
|
101 while (<$file>) { |
|
102 next unless (m/^operator\.(.*)$/); |
|
103 (m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/); |
|
104 |
|
105 # 1.1) build the key |
|
106 $key = $1; |
|
107 |
|
108 # 1.2) build the array |
|
109 $_ = $2; |
|
110 @value = (); |
|
111 $value[0] = $3; |
|
112 if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; } |
|
113 if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; } |
|
114 if (m/^(.*)minsize:(\d)(.*)$/) { $value[3] = $2; } else { $value[3] = "1"; } |
|
115 $value[4] = (m/^(.*)largeop(.*)$/); |
|
116 $value[5] = (m/^(.*)movablelimits(.*)$/); |
|
117 $value[6] = (m/^(.*)stretchy(.*)$/); |
|
118 $value[7] = (m/^(.*)separator(.*)$/); |
|
119 $value[8] = (m/^(.*)accent(.*)$/); |
|
120 $value[9] = (m/^(.*)fence(.*)$/); |
|
121 $value[10] = (m/^(.*)symmetric(.*)$/); |
|
122 $value[11] = ""; # we don't store "priority" in our dictionary |
|
123 $value[12] = ""; # we don't store "linebreakstyle" in our dictionary |
|
124 if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; } |
|
125 else { $value[13] = ""; } |
|
126 $value[14] = (m/^(.*)integral(.*)$/); |
|
127 $value[15] = (m/^(.*)mirrorable(.*)$/); |
|
128 |
|
129 # 1.3) save the key and value |
|
130 $moz_hash{$key} = [ @value ]; |
|
131 } |
|
132 |
|
133 close($file); |
|
134 |
|
135 ################################################################################ |
|
136 # 2) If mode "make-js", generate tests/stretchy-and-large-operators.js and quit. |
|
137 # If mode "check", verify validity of our operator dictionary and quit. |
|
138 # If mode "compare", go to step 3) |
|
139 |
|
140 if ($ARGV[0] eq "make-js") { |
|
141 print "generating file $FILE_JS...\n"; |
|
142 open($file_js, ">$FILE_JS") || |
|
143 die ("Couldn't open $FILE_JS!"); |
|
144 print $file_js "// This file is automatically generated. Do not edit.\n"; |
|
145 print $file_js "var stretchy_and_large_operators = ["; |
|
146 @moz_keys = (keys %moz_hash); |
|
147 while ($key = pop(@moz_keys)) { |
|
148 @moz = @{ $moz_hash{$key} }; |
|
149 |
|
150 $_ = $key; |
|
151 (m/^operator\.([\w|\.|\\]*)\.(prefix|infix|postfix)$/); |
|
152 $opname = "\\$1.$2: "; |
|
153 |
|
154 if (@moz[4]) { |
|
155 print $file_js "['$opname', '$1','l','$2'],"; |
|
156 } |
|
157 |
|
158 if (@moz[6]) { |
|
159 $_ = substr(@moz[13], 0, 1); |
|
160 print $file_js "['$opname', '$1','$_','$2'],"; |
|
161 } |
|
162 } |
|
163 print $file_js "];\n"; |
|
164 close($file_js); |
|
165 exit 0; |
|
166 } |
|
167 |
|
168 if ($ARGV[0] eq "check") { |
|
169 print "checking operator dictionary...\n"; |
|
170 open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") || |
|
171 die ("Couldn't open $FILE_SYNTAX_ERRORS!"); |
|
172 |
|
173 $nb_errors = 0; |
|
174 $nb_warnings = 0; |
|
175 @moz_keys = (keys %moz_hash); |
|
176 # check the validity of our private data |
|
177 while ($key = pop(@moz_keys)) { |
|
178 @moz = @{ $moz_hash{$key} }; |
|
179 $entry = &generateEntry($key, @moz); |
|
180 $valid = 1; |
|
181 |
|
182 if (!(@moz[13] eq "" || |
|
183 @moz[13] eq "horizontal" || |
|
184 @moz[13] eq "vertical")) { |
|
185 $valid = 0; |
|
186 $nb_errors++; |
|
187 print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n"; |
|
188 } |
|
189 |
|
190 if (!@moz[4] && @moz[14]) { |
|
191 $valid = 0; |
|
192 $nb_warnings++; |
|
193 print $file_syntax_errors "warning: operator is integral but not largeop\n"; |
|
194 } |
|
195 |
|
196 $_ = @moz[0]; |
|
197 if ((m/^(.*)[iI]ntegral(.*)$/) && !@moz[14]) { |
|
198 $valid = 0; |
|
199 $nb_warnings++; |
|
200 print $file_syntax_errors "warning: operator contains the term \"integral\" in its comment, but is not integral\n"; |
|
201 } |
|
202 |
|
203 if (!$valid) { |
|
204 print $file_syntax_errors $entry; |
|
205 print $file_syntax_errors "\n"; |
|
206 } |
|
207 } |
|
208 |
|
209 # check that all forms have the same direction. |
|
210 @moz_keys = (keys %moz_hash); |
|
211 while ($key = pop(@moz_keys)) { |
|
212 |
|
213 if (@{ $moz_hash{$key} }) { |
|
214 # the operator has not been removed from the hash table yet. |
|
215 |
|
216 $_ = $key; |
|
217 (m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/); |
|
218 $key_prefix = "$1.prefix"; |
|
219 $key_infix = "$1.infix"; |
|
220 $key_postfix = "$1.postfix"; |
|
221 @moz_prefix = @{ $moz_hash{$key_prefix} }; |
|
222 @moz_infix = @{ $moz_hash{$key_infix} }; |
|
223 @moz_postfix = @{ $moz_hash{$key_postfix} }; |
|
224 |
|
225 $same_direction = 1; |
|
226 |
|
227 if (@moz_prefix) { |
|
228 if (@moz_infix && |
|
229 !($moz_infix[13] eq $moz_prefix[13])) { |
|
230 $same_direction = 0; |
|
231 } |
|
232 if (@moz_postfix && |
|
233 !($moz_postfix[13] eq $moz_prefix[13])) { |
|
234 $same_direction = 0; |
|
235 } |
|
236 } |
|
237 if (@moz_infix) { |
|
238 if (@moz_postfix && |
|
239 !($moz_postfix[13] eq $moz_infix[13])) { |
|
240 $same_direction = 0; |
|
241 } |
|
242 } |
|
243 |
|
244 if (!$same_direction) { |
|
245 $nb_errors++; |
|
246 print $file_syntax_errors |
|
247 "error: operator has a stretchy form, but all forms"; |
|
248 print $file_syntax_errors |
|
249 " have not the same direction\n"; |
|
250 if (@moz_prefix) { |
|
251 $_ = &generateEntry($key_prefix, @moz_prefix); |
|
252 print $file_syntax_errors $_; |
|
253 } |
|
254 if (@moz_infix) { |
|
255 $_ = &generateEntry($key_infix, @moz_infix); |
|
256 print $file_syntax_errors $_; |
|
257 } |
|
258 if (@moz_postfix) { |
|
259 $_ = &generateEntry($key_postfix, @moz_postfix); |
|
260 print $file_syntax_errors $_; |
|
261 } |
|
262 print $file_syntax_errors "\n"; |
|
263 } |
|
264 |
|
265 if (@moz_prefix) { |
|
266 delete $moz_hash{$key.prefix}; |
|
267 } |
|
268 if (@moz_infix) { |
|
269 delete $moz_hash{$key_infix}; |
|
270 } |
|
271 if (@moz_postfix) { |
|
272 delete $moz_hash{$key_postfix}; |
|
273 } |
|
274 } |
|
275 } |
|
276 |
|
277 close($file_syntax_errors); |
|
278 print "\n"; |
|
279 if ($nb_errors > 0 || $nb_warnings > 0) { |
|
280 print "$nb_errors error(s) found\n"; |
|
281 print "$nb_warnings warning(s) found\n"; |
|
282 print "See output file $FILE_SYNTAX_ERRORS.\n\n"; |
|
283 } else { |
|
284 print "No error found.\n\n"; |
|
285 } |
|
286 |
|
287 exit 0; |
|
288 } |
|
289 |
|
290 ################################################################################ |
|
291 # 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY |
|
292 |
|
293 print "loading $WG_DICTIONARY...\n"; |
|
294 my $parser = XML::LibXML->new(); |
|
295 my $doc = $parser->parse_file($WG_DICTIONARY); |
|
296 |
|
297 print "building dictionary...\n"; |
|
298 @wg_keys = (); |
|
299 |
|
300 foreach my $entry ($doc->findnodes('/root/entry')) { |
|
301 # 3.1) build the key |
|
302 $key = "operator."; |
|
303 |
|
304 $_ = $entry->getAttribute("unicode"); |
|
305 $_ = "$_-"; |
|
306 while (m/^U?0(\w*)-(.*)$/) { |
|
307 # Concatenate .\uNNNN |
|
308 $key = "$key\\u$1"; |
|
309 $_ = $2; |
|
310 } |
|
311 |
|
312 $_ = $entry->getAttribute("form"); # "Form" |
|
313 $key = "$key.$_"; |
|
314 |
|
315 # 3.2) build the array |
|
316 @value = (); |
|
317 $value[0] = lc($entry->getAttribute("description")); |
|
318 $value[1] = $entry->getAttribute("lspace"); |
|
319 if ($value[1] eq "") { $value[1] = "5"; } |
|
320 $value[2] = $entry->getAttribute("rspace"); |
|
321 if ($value[2] eq "") { $value[2] = "5"; } |
|
322 $value[3] = $entry->getAttribute("minsize"); |
|
323 if ($value[3] eq "") { $value[3] = "1"; } |
|
324 |
|
325 $_ = $entry->getAttribute("properties"); |
|
326 $value[4] = (m/^(.*)largeop(.*)$/); |
|
327 $value[5] = (m/^(.*)movablelimits(.*)$/); |
|
328 $value[6] = (m/^(.*)stretchy(.*)$/); |
|
329 $value[7] = (m/^(.*)separator(.*)$/); |
|
330 $value[8] = (m/^(.*)accent(.*)$/); |
|
331 $value[9] = (m/^(.*)fence(.*)$/); |
|
332 $value[10] = (m/^(.*)symmetric(.*)$/); |
|
333 $value[15] = (m/^(.*)mirrorable(.*)$/); |
|
334 $value[11] = $entry->getAttribute("priority"); |
|
335 $value[12] = $entry->getAttribute("linebreakstyle"); |
|
336 |
|
337 # not stored in the WG dictionary |
|
338 $value[13] = ""; # direction |
|
339 $value[14] = ""; # integral |
|
340 |
|
341 # 3.3) save the key and value |
|
342 push(@wg_keys, $key); |
|
343 $wg_hash{$key} = [ @value ]; |
|
344 } |
|
345 @wg_keys = reverse(@wg_keys); |
|
346 |
|
347 ################################################################################ |
|
348 # 4) Compare the two dictionaries and output the result |
|
349 |
|
350 print "comparing dictionaries...\n"; |
|
351 open($file_differences, ">$FILE_DIFFERENCES") || |
|
352 die ("Couldn't open $FILE_DIFFERENCES!"); |
|
353 open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") || |
|
354 die ("Couldn't open $FILE_NEW_DICTIONARY!"); |
|
355 |
|
356 $conflicting = 0; $conflicting_stretching = 0; |
|
357 $new = 0; $new_stretching = 0; |
|
358 $obsolete = 0; $obsolete_stretching = 0; |
|
359 $unchanged = 0; |
|
360 |
|
361 # 4.1) look to the entries of the WG dictionary |
|
362 while ($key = pop(@wg_keys)) { |
|
363 |
|
364 @wg = @{ $wg_hash{$key} }; |
|
365 delete $wg_hash{$key}; |
|
366 $wg_value = &generateCommon(@wg); |
|
367 |
|
368 if (exists($moz_hash{$key})) { |
|
369 # entry is in both dictionary |
|
370 @moz = @{ $moz_hash{$key} }; |
|
371 delete $moz_hash{$key}; |
|
372 $moz_value = &generateCommon(@moz); |
|
373 if ($moz_value ne $wg_value) { |
|
374 # conflicting entry |
|
375 print $file_differences "[conflict]"; |
|
376 $conflicting++; |
|
377 if ($moz[6] != $wg[6]) { |
|
378 print $file_differences "[stretching]"; |
|
379 $conflicting_stretching++; |
|
380 } |
|
381 print $file_differences " - $key ($wg[0])\n"; |
|
382 print $file_differences "-$moz_value\n+$wg_value\n\n"; |
|
383 $_ = &completeCommon($wg_value, $key, @moz, @wg); |
|
384 print $file_new_dictionary $_; |
|
385 } else { |
|
386 # unchanged entry |
|
387 $unchanged++; |
|
388 $_ = &completeCommon($wg_value, $key, @moz, @wg); |
|
389 print $file_new_dictionary $_; |
|
390 } |
|
391 } else { |
|
392 # we don't have this entry in our dictionary yet |
|
393 print $file_differences "[new entry]"; |
|
394 $new++; |
|
395 if ($wg[6]) { |
|
396 print $file_differences "[stretching]"; |
|
397 $new_stretching++; |
|
398 } |
|
399 print $file_differences " - $key ($wg[0])\n"; |
|
400 print $file_differences "-\n+$wg_value\n\n"; |
|
401 $_ = &completeCommon($wg_value, $key, (), @wg); |
|
402 print $file_new_dictionary $_; |
|
403 } |
|
404 } |
|
405 |
|
406 print $file_new_dictionary |
|
407 "\n# Entries below are not part of the official MathML dictionary\n\n"; |
|
408 # 4.2) look in our dictionary the remaining entries |
|
409 @moz_keys = (keys %moz_hash); |
|
410 @moz_keys = reverse(sort(@moz_keys)); |
|
411 |
|
412 while ($key = pop(@moz_keys)) { |
|
413 @moz = @{ $moz_hash{$key} }; |
|
414 $moz_value = &generateCommon(@moz); |
|
415 print $file_differences "[obsolete entry]"; |
|
416 $obsolete++; |
|
417 if ($moz[6]) { |
|
418 print $file_differences "[stretching]"; |
|
419 $obsolete_stretching++; |
|
420 } |
|
421 print $file_differences " - $key ($moz[0])\n"; |
|
422 print $file_differences "-$moz_value\n+\n\n"; |
|
423 $_ = &completeCommon($moz_value, $key, (), @moz); |
|
424 print $file_new_dictionary $_; |
|
425 } |
|
426 |
|
427 close($file_differences); |
|
428 close($file_new_dictionary); |
|
429 |
|
430 print "\n"; |
|
431 print "- $obsolete obsolete entries "; |
|
432 print "($obsolete_stretching of them are related to stretching)\n"; |
|
433 print "- $unchanged unchanged entries\n"; |
|
434 print "- $conflicting conflicting entries "; |
|
435 print "($conflicting_stretching of them are related to stretching)\n"; |
|
436 print "- $new new entries "; |
|
437 print "($new_stretching of them are related to stretching)\n"; |
|
438 print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n"; |
|
439 print "After having modified the dictionary, please run"; |
|
440 print "./updateOperatorDictionary check\n\n"; |
|
441 exit 0; |
|
442 |
|
443 ################################################################################ |
|
444 sub usage { |
|
445 # display the accepted command syntax and quit |
|
446 print "usage:\n"; |
|
447 print " ./updateOperatorDictionary.pl download [unicode.xml]\n"; |
|
448 print " ./updateOperatorDictionary.pl compare [dictionary.xml]\n"; |
|
449 print " ./updateOperatorDictionary.pl check\n"; |
|
450 print " ./updateOperatorDictionary.pl make-js\n"; |
|
451 print " ./updateOperatorDictionary.pl clean\n"; |
|
452 exit 0; |
|
453 } |
|
454 |
|
455 sub generateCommon { |
|
456 # helper function to generate the string of data shared by both dictionaries |
|
457 my(@v) = @_; |
|
458 $entry = "lspace:$v[1] rspace:$v[2]"; |
|
459 if ($v[3] ne "1") { $entry = "$entry minsize:$v[3]"; } |
|
460 if ($v[4]) { $entry = "$entry largeop"; } |
|
461 if ($v[5]) { $entry = "$entry movablelimits"; } |
|
462 if ($v[6]) { $entry = "$entry stretchy"; } |
|
463 if ($v[7]) { $entry = "$entry separator"; } |
|
464 if ($v[8]) { $entry = "$entry accent"; } |
|
465 if ($v[9]) { $entry = "$entry fence"; } |
|
466 if ($v[10]) { $entry = "$entry symmetric"; } |
|
467 if ($v[15]) { $entry = "$entry mirrorable"; } |
|
468 return $entry; |
|
469 } |
|
470 |
|
471 sub completeCommon { |
|
472 # helper to add key and private data to generateCommon |
|
473 my($entry, $key, @v_moz, @v_wg) = @_; |
|
474 |
|
475 $entry = "$key = $entry"; |
|
476 |
|
477 if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; } |
|
478 if ($v_moz[14]) { $entry = "$entry integral"; } |
|
479 if ($v_moz[15]) { $entry = "$entry mirrorable"; } |
|
480 |
|
481 if ($v_moz[0]) { |
|
482 # keep our previous comment |
|
483 $entry = "$entry # $v_moz[0]"; |
|
484 } else { |
|
485 # otherwise use the description given by the WG |
|
486 $entry = "$entry # $v_wg[0]"; |
|
487 } |
|
488 |
|
489 $entry = "$entry\n"; |
|
490 return $entry; |
|
491 } |
|
492 |
|
493 sub generateEntry { |
|
494 # helper function to generate an entry of our operator dictionary |
|
495 my($key, @moz) = @_; |
|
496 $entry = &generateCommon(@moz); |
|
497 $entry = &completeCommon($entry, $key, @moz, @moz); |
|
498 return $entry; |
|
499 } |