1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/layout/mathml/updateOperatorDictionary.pl Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,499 @@ 1.4 +#!/usr/bin/perl 1.5 +# -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*- 1.6 +# This Source Code Form is subject to the terms of the Mozilla Public 1.7 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.9 + 1.10 +use XML::LibXSLT; 1.11 +use XML::LibXML; 1.12 +use LWP::Simple; 1.13 + 1.14 +# output files 1.15 +$FILE_UNICODE = "unicode.xml"; 1.16 +$FILE_DICTIONARY = "dictionary.xml"; 1.17 +$FILE_DIFFERENCES = "differences.txt"; 1.18 +$FILE_NEW_DICTIONARY = "new_dictionary.txt"; 1.19 +$FILE_SYNTAX_ERRORS = "syntax_errors.txt"; 1.20 +$FILE_JS = "tests/stretchy-and-large-operators.js"; 1.21 + 1.22 +# our dictionary (property file) 1.23 +$MOZ_DICTIONARY = "mathfont.properties"; 1.24 + 1.25 +# dictionary provided by the W3C in "XML Entity Definitions for Characters" 1.26 +$WG_DICTIONARY_URL = "http://www.w3.org/2003/entities/2007xml/unicode.xml"; 1.27 + 1.28 +# XSL stylesheet to extract relevant data from the dictionary 1.29 +$DICTIONARY_XSL = "operatorDictionary.xsl"; 1.30 + 1.31 +# dictionary provided by the W3C transformed with operatorDictionary.xsl 1.32 +$WG_DICTIONARY = $FILE_DICTIONARY; 1.33 + 1.34 +if (!($#ARGV >= 0 && 1.35 + ((($ARGV[0] eq "download") && $#ARGV <= 1) || 1.36 + (($ARGV[0] eq "compare") && $#ARGV <= 1) || 1.37 + (($ARGV[0] eq "check") && $#ARGV <= 0) || 1.38 + (($ARGV[0] eq "make-js") && $#ARGV <= 0) || 1.39 + (($ARGV[0] eq "clean") && $#ARGV <= 0)))) { 1.40 + &usage; 1.41 +} 1.42 + 1.43 +if ($ARGV[0] eq "download") { 1.44 + if ($#ARGV == 1) { 1.45 + $WG_DICTIONARY_URL = $ARGV[1]; 1.46 + } 1.47 + print "Downloading $WG_DICTIONARY_URL...\n"; 1.48 + getstore($WG_DICTIONARY_URL, $FILE_UNICODE); 1.49 + 1.50 + print "Converting $FILE_UNICODE into $FILE_DICTIONARY...\n"; 1.51 + my $xslt = XML::LibXSLT->new(); 1.52 + my $source = XML::LibXML->load_xml(location => $FILE_UNICODE); 1.53 + my $style_doc = XML::LibXML->load_xml(location => $DICTIONARY_XSL, 1.54 + no_cdata=>1); 1.55 + my $stylesheet = $xslt->parse_stylesheet($style_doc); 1.56 + my $results = $stylesheet->transform($source); 1.57 + open($file, ">$FILE_DICTIONARY") || die ("Couldn't open $FILE_DICTIONARY!"); 1.58 + print $file $stylesheet->output_as_bytes($results); 1.59 + close($file); 1.60 + exit 0; 1.61 +} 1.62 + 1.63 +if ($ARGV[0] eq "clean") { 1.64 + unlink($FILE_UNICODE, 1.65 + $FILE_DICTIONARY, 1.66 + $FILE_DIFFERENCES, 1.67 + $FILE_NEW_DICTIONARY, 1.68 + $FILE_SYNTAX_ERRORS); 1.69 + exit 0; 1.70 +} 1.71 + 1.72 +if ($ARGV[0] eq "compare" && $#ARGV == 1) { 1.73 + $WG_DICTIONARY = $ARGV[1]; 1.74 +} 1.75 + 1.76 +################################################################################ 1.77 +# structure of the dictionary used by this script: 1.78 +# - key: same as in mathfont.properties 1.79 +# - table: 1.80 +# index | value 1.81 +# 0 | description 1.82 +# 1 | lspace 1.83 +# 2 | rspace 1.84 +# 3 | minsize 1.85 +# 4 | largeop 1.86 +# 5 | movablelimits 1.87 +# 6 | stretchy 1.88 +# 7 | separator 1.89 +# 8 | accent 1.90 +# 9 | fence 1.91 +# 10 | symmetric 1.92 +# 11 | priority 1.93 +# 12 | linebreakstyle 1.94 +# 13 | direction 1.95 +# 14 | integral 1.96 +# 15 | mirrorable 1.97 + 1.98 +# 1) build %moz_hash from $MOZ_DICTIONARY 1.99 + 1.100 +print "loading $MOZ_DICTIONARY...\n"; 1.101 +open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!"); 1.102 + 1.103 +print "building dictionary...\n"; 1.104 +while (<$file>) { 1.105 + next unless (m/^operator\.(.*)$/); 1.106 + (m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/); 1.107 + 1.108 + # 1.1) build the key 1.109 + $key = $1; 1.110 + 1.111 + # 1.2) build the array 1.112 + $_ = $2; 1.113 + @value = (); 1.114 + $value[0] = $3; 1.115 + if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; } 1.116 + if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; } 1.117 + if (m/^(.*)minsize:(\d)(.*)$/) { $value[3] = $2; } else { $value[3] = "1"; } 1.118 + $value[4] = (m/^(.*)largeop(.*)$/); 1.119 + $value[5] = (m/^(.*)movablelimits(.*)$/); 1.120 + $value[6] = (m/^(.*)stretchy(.*)$/); 1.121 + $value[7] = (m/^(.*)separator(.*)$/); 1.122 + $value[8] = (m/^(.*)accent(.*)$/); 1.123 + $value[9] = (m/^(.*)fence(.*)$/); 1.124 + $value[10] = (m/^(.*)symmetric(.*)$/); 1.125 + $value[11] = ""; # we don't store "priority" in our dictionary 1.126 + $value[12] = ""; # we don't store "linebreakstyle" in our dictionary 1.127 + if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; } 1.128 + else { $value[13] = ""; } 1.129 + $value[14] = (m/^(.*)integral(.*)$/); 1.130 + $value[15] = (m/^(.*)mirrorable(.*)$/); 1.131 + 1.132 + # 1.3) save the key and value 1.133 + $moz_hash{$key} = [ @value ]; 1.134 +} 1.135 + 1.136 +close($file); 1.137 + 1.138 +################################################################################ 1.139 +# 2) If mode "make-js", generate tests/stretchy-and-large-operators.js and quit. 1.140 +# If mode "check", verify validity of our operator dictionary and quit. 1.141 +# If mode "compare", go to step 3) 1.142 + 1.143 +if ($ARGV[0] eq "make-js") { 1.144 + print "generating file $FILE_JS...\n"; 1.145 + open($file_js, ">$FILE_JS") || 1.146 + die ("Couldn't open $FILE_JS!"); 1.147 + print $file_js "// This file is automatically generated. Do not edit.\n"; 1.148 + print $file_js "var stretchy_and_large_operators = ["; 1.149 + @moz_keys = (keys %moz_hash); 1.150 + while ($key = pop(@moz_keys)) { 1.151 + @moz = @{ $moz_hash{$key} }; 1.152 + 1.153 + $_ = $key; 1.154 + (m/^operator\.([\w|\.|\\]*)\.(prefix|infix|postfix)$/); 1.155 + $opname = "\\$1.$2: "; 1.156 + 1.157 + if (@moz[4]) { 1.158 + print $file_js "['$opname', '$1','l','$2'],"; 1.159 + } 1.160 + 1.161 + if (@moz[6]) { 1.162 + $_ = substr(@moz[13], 0, 1); 1.163 + print $file_js "['$opname', '$1','$_','$2'],"; 1.164 + } 1.165 + } 1.166 + print $file_js "];\n"; 1.167 + close($file_js); 1.168 + exit 0; 1.169 +} 1.170 + 1.171 +if ($ARGV[0] eq "check") { 1.172 + print "checking operator dictionary...\n"; 1.173 + open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") || 1.174 + die ("Couldn't open $FILE_SYNTAX_ERRORS!"); 1.175 + 1.176 + $nb_errors = 0; 1.177 + $nb_warnings = 0; 1.178 + @moz_keys = (keys %moz_hash); 1.179 + # check the validity of our private data 1.180 + while ($key = pop(@moz_keys)) { 1.181 + @moz = @{ $moz_hash{$key} }; 1.182 + $entry = &generateEntry($key, @moz); 1.183 + $valid = 1; 1.184 + 1.185 + if (!(@moz[13] eq "" || 1.186 + @moz[13] eq "horizontal" || 1.187 + @moz[13] eq "vertical")) { 1.188 + $valid = 0; 1.189 + $nb_errors++; 1.190 + print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n"; 1.191 + } 1.192 + 1.193 + if (!@moz[4] && @moz[14]) { 1.194 + $valid = 0; 1.195 + $nb_warnings++; 1.196 + print $file_syntax_errors "warning: operator is integral but not largeop\n"; 1.197 + } 1.198 + 1.199 + $_ = @moz[0]; 1.200 + if ((m/^(.*)[iI]ntegral(.*)$/) && !@moz[14]) { 1.201 + $valid = 0; 1.202 + $nb_warnings++; 1.203 + print $file_syntax_errors "warning: operator contains the term \"integral\" in its comment, but is not integral\n"; 1.204 + } 1.205 + 1.206 + if (!$valid) { 1.207 + print $file_syntax_errors $entry; 1.208 + print $file_syntax_errors "\n"; 1.209 + } 1.210 + } 1.211 + 1.212 + # check that all forms have the same direction. 1.213 + @moz_keys = (keys %moz_hash); 1.214 + while ($key = pop(@moz_keys)) { 1.215 + 1.216 + if (@{ $moz_hash{$key} }) { 1.217 + # the operator has not been removed from the hash table yet. 1.218 + 1.219 + $_ = $key; 1.220 + (m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/); 1.221 + $key_prefix = "$1.prefix"; 1.222 + $key_infix = "$1.infix"; 1.223 + $key_postfix = "$1.postfix"; 1.224 + @moz_prefix = @{ $moz_hash{$key_prefix} }; 1.225 + @moz_infix = @{ $moz_hash{$key_infix} }; 1.226 + @moz_postfix = @{ $moz_hash{$key_postfix} }; 1.227 + 1.228 + $same_direction = 1; 1.229 + 1.230 + if (@moz_prefix) { 1.231 + if (@moz_infix && 1.232 + !($moz_infix[13] eq $moz_prefix[13])) { 1.233 + $same_direction = 0; 1.234 + } 1.235 + if (@moz_postfix && 1.236 + !($moz_postfix[13] eq $moz_prefix[13])) { 1.237 + $same_direction = 0; 1.238 + } 1.239 + } 1.240 + if (@moz_infix) { 1.241 + if (@moz_postfix && 1.242 + !($moz_postfix[13] eq $moz_infix[13])) { 1.243 + $same_direction = 0; 1.244 + } 1.245 + } 1.246 + 1.247 + if (!$same_direction) { 1.248 + $nb_errors++; 1.249 + print $file_syntax_errors 1.250 + "error: operator has a stretchy form, but all forms"; 1.251 + print $file_syntax_errors 1.252 + " have not the same direction\n"; 1.253 + if (@moz_prefix) { 1.254 + $_ = &generateEntry($key_prefix, @moz_prefix); 1.255 + print $file_syntax_errors $_; 1.256 + } 1.257 + if (@moz_infix) { 1.258 + $_ = &generateEntry($key_infix, @moz_infix); 1.259 + print $file_syntax_errors $_; 1.260 + } 1.261 + if (@moz_postfix) { 1.262 + $_ = &generateEntry($key_postfix, @moz_postfix); 1.263 + print $file_syntax_errors $_; 1.264 + } 1.265 + print $file_syntax_errors "\n"; 1.266 + } 1.267 + 1.268 + if (@moz_prefix) { 1.269 + delete $moz_hash{$key.prefix}; 1.270 + } 1.271 + if (@moz_infix) { 1.272 + delete $moz_hash{$key_infix}; 1.273 + } 1.274 + if (@moz_postfix) { 1.275 + delete $moz_hash{$key_postfix}; 1.276 + } 1.277 + } 1.278 + } 1.279 + 1.280 + close($file_syntax_errors); 1.281 + print "\n"; 1.282 + if ($nb_errors > 0 || $nb_warnings > 0) { 1.283 + print "$nb_errors error(s) found\n"; 1.284 + print "$nb_warnings warning(s) found\n"; 1.285 + print "See output file $FILE_SYNTAX_ERRORS.\n\n"; 1.286 + } else { 1.287 + print "No error found.\n\n"; 1.288 + } 1.289 + 1.290 + exit 0; 1.291 +} 1.292 + 1.293 +################################################################################ 1.294 +# 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY 1.295 + 1.296 +print "loading $WG_DICTIONARY...\n"; 1.297 +my $parser = XML::LibXML->new(); 1.298 +my $doc = $parser->parse_file($WG_DICTIONARY); 1.299 + 1.300 +print "building dictionary...\n"; 1.301 +@wg_keys = (); 1.302 + 1.303 +foreach my $entry ($doc->findnodes('/root/entry')) { 1.304 + # 3.1) build the key 1.305 + $key = "operator."; 1.306 + 1.307 + $_ = $entry->getAttribute("unicode"); 1.308 + $_ = "$_-"; 1.309 + while (m/^U?0(\w*)-(.*)$/) { 1.310 + # Concatenate .\uNNNN 1.311 + $key = "$key\\u$1"; 1.312 + $_ = $2; 1.313 + } 1.314 + 1.315 + $_ = $entry->getAttribute("form"); # "Form" 1.316 + $key = "$key.$_"; 1.317 + 1.318 + # 3.2) build the array 1.319 + @value = (); 1.320 + $value[0] = lc($entry->getAttribute("description")); 1.321 + $value[1] = $entry->getAttribute("lspace"); 1.322 + if ($value[1] eq "") { $value[1] = "5"; } 1.323 + $value[2] = $entry->getAttribute("rspace"); 1.324 + if ($value[2] eq "") { $value[2] = "5"; } 1.325 + $value[3] = $entry->getAttribute("minsize"); 1.326 + if ($value[3] eq "") { $value[3] = "1"; } 1.327 + 1.328 + $_ = $entry->getAttribute("properties"); 1.329 + $value[4] = (m/^(.*)largeop(.*)$/); 1.330 + $value[5] = (m/^(.*)movablelimits(.*)$/); 1.331 + $value[6] = (m/^(.*)stretchy(.*)$/); 1.332 + $value[7] = (m/^(.*)separator(.*)$/); 1.333 + $value[8] = (m/^(.*)accent(.*)$/); 1.334 + $value[9] = (m/^(.*)fence(.*)$/); 1.335 + $value[10] = (m/^(.*)symmetric(.*)$/); 1.336 + $value[15] = (m/^(.*)mirrorable(.*)$/); 1.337 + $value[11] = $entry->getAttribute("priority"); 1.338 + $value[12] = $entry->getAttribute("linebreakstyle"); 1.339 + 1.340 + # not stored in the WG dictionary 1.341 + $value[13] = ""; # direction 1.342 + $value[14] = ""; # integral 1.343 + 1.344 + # 3.3) save the key and value 1.345 + push(@wg_keys, $key); 1.346 + $wg_hash{$key} = [ @value ]; 1.347 +} 1.348 +@wg_keys = reverse(@wg_keys); 1.349 + 1.350 +################################################################################ 1.351 +# 4) Compare the two dictionaries and output the result 1.352 + 1.353 +print "comparing dictionaries...\n"; 1.354 +open($file_differences, ">$FILE_DIFFERENCES") || 1.355 + die ("Couldn't open $FILE_DIFFERENCES!"); 1.356 +open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") || 1.357 + die ("Couldn't open $FILE_NEW_DICTIONARY!"); 1.358 + 1.359 +$conflicting = 0; $conflicting_stretching = 0; 1.360 +$new = 0; $new_stretching = 0; 1.361 +$obsolete = 0; $obsolete_stretching = 0; 1.362 +$unchanged = 0; 1.363 + 1.364 +# 4.1) look to the entries of the WG dictionary 1.365 +while ($key = pop(@wg_keys)) { 1.366 + 1.367 + @wg = @{ $wg_hash{$key} }; 1.368 + delete $wg_hash{$key}; 1.369 + $wg_value = &generateCommon(@wg); 1.370 + 1.371 + if (exists($moz_hash{$key})) { 1.372 + # entry is in both dictionary 1.373 + @moz = @{ $moz_hash{$key} }; 1.374 + delete $moz_hash{$key}; 1.375 + $moz_value = &generateCommon(@moz); 1.376 + if ($moz_value ne $wg_value) { 1.377 + # conflicting entry 1.378 + print $file_differences "[conflict]"; 1.379 + $conflicting++; 1.380 + if ($moz[6] != $wg[6]) { 1.381 + print $file_differences "[stretching]"; 1.382 + $conflicting_stretching++; 1.383 + } 1.384 + print $file_differences " - $key ($wg[0])\n"; 1.385 + print $file_differences "-$moz_value\n+$wg_value\n\n"; 1.386 + $_ = &completeCommon($wg_value, $key, @moz, @wg); 1.387 + print $file_new_dictionary $_; 1.388 + } else { 1.389 + # unchanged entry 1.390 + $unchanged++; 1.391 + $_ = &completeCommon($wg_value, $key, @moz, @wg); 1.392 + print $file_new_dictionary $_; 1.393 + } 1.394 + } else { 1.395 + # we don't have this entry in our dictionary yet 1.396 + print $file_differences "[new entry]"; 1.397 + $new++; 1.398 + if ($wg[6]) { 1.399 + print $file_differences "[stretching]"; 1.400 + $new_stretching++; 1.401 + } 1.402 + print $file_differences " - $key ($wg[0])\n"; 1.403 + print $file_differences "-\n+$wg_value\n\n"; 1.404 + $_ = &completeCommon($wg_value, $key, (), @wg); 1.405 + print $file_new_dictionary $_; 1.406 + } 1.407 +} 1.408 + 1.409 +print $file_new_dictionary 1.410 + "\n# Entries below are not part of the official MathML dictionary\n\n"; 1.411 +# 4.2) look in our dictionary the remaining entries 1.412 +@moz_keys = (keys %moz_hash); 1.413 +@moz_keys = reverse(sort(@moz_keys)); 1.414 + 1.415 +while ($key = pop(@moz_keys)) { 1.416 + @moz = @{ $moz_hash{$key} }; 1.417 + $moz_value = &generateCommon(@moz); 1.418 + print $file_differences "[obsolete entry]"; 1.419 + $obsolete++; 1.420 + if ($moz[6]) { 1.421 + print $file_differences "[stretching]"; 1.422 + $obsolete_stretching++; 1.423 + } 1.424 + print $file_differences " - $key ($moz[0])\n"; 1.425 + print $file_differences "-$moz_value\n+\n\n"; 1.426 + $_ = &completeCommon($moz_value, $key, (), @moz); 1.427 + print $file_new_dictionary $_; 1.428 +} 1.429 + 1.430 +close($file_differences); 1.431 +close($file_new_dictionary); 1.432 + 1.433 +print "\n"; 1.434 +print "- $obsolete obsolete entries "; 1.435 +print "($obsolete_stretching of them are related to stretching)\n"; 1.436 +print "- $unchanged unchanged entries\n"; 1.437 +print "- $conflicting conflicting entries "; 1.438 +print "($conflicting_stretching of them are related to stretching)\n"; 1.439 +print "- $new new entries "; 1.440 +print "($new_stretching of them are related to stretching)\n"; 1.441 +print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n"; 1.442 +print "After having modified the dictionary, please run"; 1.443 +print "./updateOperatorDictionary check\n\n"; 1.444 +exit 0; 1.445 + 1.446 +################################################################################ 1.447 +sub usage { 1.448 + # display the accepted command syntax and quit 1.449 + print "usage:\n"; 1.450 + print " ./updateOperatorDictionary.pl download [unicode.xml]\n"; 1.451 + print " ./updateOperatorDictionary.pl compare [dictionary.xml]\n"; 1.452 + print " ./updateOperatorDictionary.pl check\n"; 1.453 + print " ./updateOperatorDictionary.pl make-js\n"; 1.454 + print " ./updateOperatorDictionary.pl clean\n"; 1.455 + exit 0; 1.456 +} 1.457 + 1.458 +sub generateCommon { 1.459 + # helper function to generate the string of data shared by both dictionaries 1.460 + my(@v) = @_; 1.461 + $entry = "lspace:$v[1] rspace:$v[2]"; 1.462 + if ($v[3] ne "1") { $entry = "$entry minsize:$v[3]"; } 1.463 + if ($v[4]) { $entry = "$entry largeop"; } 1.464 + if ($v[5]) { $entry = "$entry movablelimits"; } 1.465 + if ($v[6]) { $entry = "$entry stretchy"; } 1.466 + if ($v[7]) { $entry = "$entry separator"; } 1.467 + if ($v[8]) { $entry = "$entry accent"; } 1.468 + if ($v[9]) { $entry = "$entry fence"; } 1.469 + if ($v[10]) { $entry = "$entry symmetric"; } 1.470 + if ($v[15]) { $entry = "$entry mirrorable"; } 1.471 + return $entry; 1.472 +} 1.473 + 1.474 +sub completeCommon { 1.475 + # helper to add key and private data to generateCommon 1.476 + my($entry, $key, @v_moz, @v_wg) = @_; 1.477 + 1.478 + $entry = "$key = $entry"; 1.479 + 1.480 + if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; } 1.481 + if ($v_moz[14]) { $entry = "$entry integral"; } 1.482 + if ($v_moz[15]) { $entry = "$entry mirrorable"; } 1.483 + 1.484 + if ($v_moz[0]) { 1.485 + # keep our previous comment 1.486 + $entry = "$entry # $v_moz[0]"; 1.487 + } else { 1.488 + # otherwise use the description given by the WG 1.489 + $entry = "$entry # $v_wg[0]"; 1.490 + } 1.491 + 1.492 + $entry = "$entry\n"; 1.493 + return $entry; 1.494 +} 1.495 + 1.496 +sub generateEntry { 1.497 + # helper function to generate an entry of our operator dictionary 1.498 + my($key, @moz) = @_; 1.499 + $entry = &generateCommon(@moz); 1.500 + $entry = &completeCommon($entry, $key, @moz, @moz); 1.501 + return $entry; 1.502 +}