layout/mathml/updateOperatorDictionary.pl

Wed, 31 Dec 2014 06:55:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:55:50 +0100
changeset 2
7e26c7da4463
permissions
-rwxr-xr-x

Added tag UPSTREAM_283F7C6 for changeset ca08bd8f51b2

     1 #!/usr/bin/perl
     2 # -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*-
     3 # This Source Code Form is subject to the terms of the Mozilla Public
     4 # License, v. 2.0. If a copy of the MPL was not distributed with this
     5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     7 use XML::LibXSLT;
     8 use XML::LibXML;
     9 use LWP::Simple;
    11 # output files
    12 $FILE_UNICODE = "unicode.xml";
    13 $FILE_DICTIONARY = "dictionary.xml";
    14 $FILE_DIFFERENCES = "differences.txt";
    15 $FILE_NEW_DICTIONARY = "new_dictionary.txt";
    16 $FILE_SYNTAX_ERRORS = "syntax_errors.txt";
    17 $FILE_JS = "tests/stretchy-and-large-operators.js";
    19 # our dictionary (property file)
    20 $MOZ_DICTIONARY = "mathfont.properties";
    22 # dictionary provided by the W3C in "XML Entity Definitions for Characters"
    23 $WG_DICTIONARY_URL = "http://www.w3.org/2003/entities/2007xml/unicode.xml";
    25 # XSL stylesheet to extract relevant data from the dictionary
    26 $DICTIONARY_XSL = "operatorDictionary.xsl";
    28 # dictionary provided by the W3C transformed with operatorDictionary.xsl 
    29 $WG_DICTIONARY = $FILE_DICTIONARY;
    31 if (!($#ARGV >= 0 &&
    32       ((($ARGV[0] eq "download") && $#ARGV <= 1) ||
    33        (($ARGV[0] eq "compare") && $#ARGV <= 1) ||
    34        (($ARGV[0] eq "check") && $#ARGV <= 0) ||
    35        (($ARGV[0] eq "make-js") && $#ARGV <= 0) ||
    36        (($ARGV[0] eq "clean") && $#ARGV <= 0)))) {
    37     &usage;
    38 }
    40 if ($ARGV[0] eq "download") {
    41     if ($#ARGV == 1) {
    42         $WG_DICTIONARY_URL = $ARGV[1];
    43     }
    44     print "Downloading $WG_DICTIONARY_URL...\n";
    45     getstore($WG_DICTIONARY_URL, $FILE_UNICODE);
    47     print "Converting $FILE_UNICODE into $FILE_DICTIONARY...\n";
    48     my $xslt = XML::LibXSLT->new();
    49     my $source = XML::LibXML->load_xml(location => $FILE_UNICODE);
    50     my $style_doc = XML::LibXML->load_xml(location => $DICTIONARY_XSL,
    51                                           no_cdata=>1);
    52     my $stylesheet = $xslt->parse_stylesheet($style_doc);
    53     my $results = $stylesheet->transform($source);
    54     open($file, ">$FILE_DICTIONARY") || die ("Couldn't open $FILE_DICTIONARY!");
    55     print $file $stylesheet->output_as_bytes($results);
    56     close($file);
    57     exit 0;
    58 }
    60 if ($ARGV[0] eq "clean") {
    61     unlink($FILE_UNICODE,
    62            $FILE_DICTIONARY,
    63            $FILE_DIFFERENCES,
    64            $FILE_NEW_DICTIONARY,
    65            $FILE_SYNTAX_ERRORS);
    66     exit 0;
    67 }
    69 if ($ARGV[0] eq "compare" && $#ARGV == 1) {
    70     $WG_DICTIONARY = $ARGV[1];
    71 }
    73 ################################################################################
    74 # structure of the dictionary used by this script:
    75 # - key: same as in mathfont.properties
    76 # - table:
    77 #    index | value
    78 #      0   | description
    79 #      1   | lspace
    80 #      2   | rspace
    81 #      3   | minsize
    82 #      4   | largeop
    83 #      5   | movablelimits
    84 #      6   | stretchy
    85 #      7   | separator
    86 #      8   | accent
    87 #      9   | fence
    88 #     10   | symmetric
    89 #     11   | priority
    90 #     12   | linebreakstyle
    91 #     13   | direction
    92 #     14   | integral
    93 #     15   | mirrorable
    95 # 1) build %moz_hash from $MOZ_DICTIONARY
    97 print "loading $MOZ_DICTIONARY...\n";
    98 open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!");
   100 print "building dictionary...\n";
   101 while (<$file>) {
   102     next unless (m/^operator\.(.*)$/);
   103     (m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/);
   105     # 1.1) build the key
   106     $key = $1;
   108     # 1.2) build the array
   109     $_ = $2;
   110     @value = ();
   111     $value[0] = $3;
   112     if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; }
   113     if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; }
   114     if (m/^(.*)minsize:(\d)(.*)$/) { $value[3] = $2; } else { $value[3] = "1"; }
   115     $value[4] = (m/^(.*)largeop(.*)$/);
   116     $value[5] = (m/^(.*)movablelimits(.*)$/);
   117     $value[6] = (m/^(.*)stretchy(.*)$/);
   118     $value[7] = (m/^(.*)separator(.*)$/);
   119     $value[8] = (m/^(.*)accent(.*)$/);
   120     $value[9] = (m/^(.*)fence(.*)$/);
   121     $value[10] = (m/^(.*)symmetric(.*)$/);
   122     $value[11] = ""; # we don't store "priority" in our dictionary
   123     $value[12] = ""; # we don't store "linebreakstyle" in our dictionary
   124     if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; }
   125     else { $value[13] = ""; }
   126     $value[14] = (m/^(.*)integral(.*)$/);
   127     $value[15] = (m/^(.*)mirrorable(.*)$/);
   129     # 1.3) save the key and value
   130     $moz_hash{$key} = [ @value ];
   131 }
   133 close($file);
   135 ################################################################################
   136 # 2) If mode "make-js", generate tests/stretchy-and-large-operators.js and quit.
   137 #    If mode "check", verify validity of our operator dictionary and quit.
   138 #    If mode "compare", go to step 3)
   140 if ($ARGV[0] eq "make-js") {
   141     print "generating file $FILE_JS...\n";
   142     open($file_js, ">$FILE_JS") ||
   143         die ("Couldn't open $FILE_JS!");
   144     print $file_js "// This file is automatically generated. Do not edit.\n";
   145     print $file_js "var stretchy_and_large_operators = [";
   146     @moz_keys = (keys %moz_hash);
   147     while ($key = pop(@moz_keys)) {
   148         @moz = @{ $moz_hash{$key} };
   150         $_ = $key;
   151         (m/^operator\.([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
   152         $opname = "\\$1.$2: ";
   154         if (@moz[4]) {
   155             print $file_js "['$opname', '$1','l','$2'],";
   156         }
   158         if (@moz[6]) {
   159             $_ = substr(@moz[13], 0, 1);
   160             print $file_js "['$opname', '$1','$_','$2'],";
   161         }
   162     }
   163     print $file_js "];\n";
   164     close($file_js);
   165     exit 0;
   166 }
   168 if ($ARGV[0] eq "check") {
   169     print "checking operator dictionary...\n";
   170     open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") ||
   171         die ("Couldn't open $FILE_SYNTAX_ERRORS!");
   173     $nb_errors = 0;
   174     $nb_warnings = 0;
   175     @moz_keys = (keys %moz_hash);
   176     # check the validity of our private data
   177     while ($key = pop(@moz_keys)) {
   178         @moz = @{ $moz_hash{$key} };
   179         $entry = &generateEntry($key, @moz);
   180         $valid = 1;
   182         if (!(@moz[13] eq "" ||
   183               @moz[13] eq "horizontal" ||
   184               @moz[13] eq "vertical")) {
   185             $valid = 0;
   186             $nb_errors++;
   187             print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n";
   188         }
   190         if (!@moz[4] && @moz[14]) {
   191             $valid = 0;
   192             $nb_warnings++;
   193             print $file_syntax_errors "warning: operator is integral but not largeop\n";
   194         }
   196         $_ = @moz[0];
   197         if ((m/^(.*)[iI]ntegral(.*)$/) && !@moz[14]) {
   198             $valid = 0;
   199             $nb_warnings++;
   200             print $file_syntax_errors "warning: operator contains the term \"integral\" in its comment, but is not integral\n";
   201         }
   203         if (!$valid) {
   204             print $file_syntax_errors $entry;
   205             print $file_syntax_errors "\n";
   206         }
   207     }
   209     # check that all forms have the same direction.
   210     @moz_keys = (keys %moz_hash);
   211     while ($key = pop(@moz_keys)) {
   213         if (@{ $moz_hash{$key} }) {
   214             # the operator has not been removed from the hash table yet.
   216             $_ = $key;
   217             (m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
   218             $key_prefix = "$1.prefix";
   219             $key_infix = "$1.infix";
   220             $key_postfix = "$1.postfix";
   221             @moz_prefix = @{ $moz_hash{$key_prefix} };
   222             @moz_infix = @{ $moz_hash{$key_infix} };
   223             @moz_postfix = @{ $moz_hash{$key_postfix} };
   225             $same_direction = 1;
   227             if (@moz_prefix) {
   228                 if (@moz_infix &&
   229                     !($moz_infix[13] eq $moz_prefix[13])) {
   230                     $same_direction = 0;
   231                 }
   232                 if (@moz_postfix &&
   233                     !($moz_postfix[13] eq $moz_prefix[13])) {
   234                     $same_direction = 0;
   235                 }
   236             }
   237             if (@moz_infix) {
   238                 if (@moz_postfix &&
   239                     !($moz_postfix[13] eq $moz_infix[13])) {
   240                     $same_direction = 0;
   241                 }
   242             }
   244             if (!$same_direction) {
   245                 $nb_errors++;
   246                 print  $file_syntax_errors
   247                     "error: operator has a stretchy form, but all forms";
   248                 print  $file_syntax_errors
   249                     " have not the same direction\n";
   250                 if (@moz_prefix) {
   251                     $_ = &generateEntry($key_prefix, @moz_prefix);
   252                     print $file_syntax_errors $_;
   253                 }
   254                 if (@moz_infix) {
   255                     $_ = &generateEntry($key_infix, @moz_infix);
   256                     print $file_syntax_errors $_;
   257                 }
   258                 if (@moz_postfix) {
   259                     $_ = &generateEntry($key_postfix, @moz_postfix);
   260                     print $file_syntax_errors $_;
   261                 }
   262                 print $file_syntax_errors "\n";
   263             }
   265             if (@moz_prefix) {
   266                 delete $moz_hash{$key.prefix};
   267             }
   268             if (@moz_infix) {
   269                 delete $moz_hash{$key_infix};
   270             }
   271             if (@moz_postfix) {
   272                 delete $moz_hash{$key_postfix};
   273             }
   274         }
   275     }
   277     close($file_syntax_errors);
   278     print "\n";
   279     if ($nb_errors > 0 || $nb_warnings > 0) {
   280         print "$nb_errors error(s) found\n";
   281         print "$nb_warnings warning(s) found\n";
   282         print "See output file $FILE_SYNTAX_ERRORS.\n\n";
   283     } else {
   284         print "No error found.\n\n";
   285     }
   287     exit 0;
   288 }
   290 ################################################################################
   291 # 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY
   293 print "loading $WG_DICTIONARY...\n";
   294 my $parser = XML::LibXML->new();
   295 my $doc = $parser->parse_file($WG_DICTIONARY);
   297 print "building dictionary...\n";
   298 @wg_keys = ();
   300 foreach my $entry ($doc->findnodes('/root/entry')) {
   301     # 3.1) build the key
   302     $key = "operator.";
   304     $_ = $entry->getAttribute("unicode");
   305     $_ = "$_-";
   306     while (m/^U?0(\w*)-(.*)$/) {
   307         # Concatenate .\uNNNN
   308         $key = "$key\\u$1";
   309         $_ = $2;
   310     }
   312     $_ = $entry->getAttribute("form"); # "Form"
   313     $key = "$key.$_";
   315     # 3.2) build the array
   316     @value = ();
   317     $value[0] = lc($entry->getAttribute("description"));
   318     $value[1] = $entry->getAttribute("lspace");
   319     if ($value[1] eq "") { $value[1] = "5"; }
   320     $value[2] = $entry->getAttribute("rspace");
   321     if ($value[2] eq "") { $value[2] = "5"; }
   322     $value[3] = $entry->getAttribute("minsize");
   323     if ($value[3] eq "") { $value[3] = "1"; }
   325     $_ = $entry->getAttribute("properties");
   326     $value[4] = (m/^(.*)largeop(.*)$/);
   327     $value[5] = (m/^(.*)movablelimits(.*)$/);
   328     $value[6] = (m/^(.*)stretchy(.*)$/);
   329     $value[7] = (m/^(.*)separator(.*)$/);
   330     $value[8] = (m/^(.*)accent(.*)$/);
   331     $value[9] = (m/^(.*)fence(.*)$/);
   332     $value[10] = (m/^(.*)symmetric(.*)$/);
   333     $value[15] = (m/^(.*)mirrorable(.*)$/);
   334     $value[11] = $entry->getAttribute("priority");
   335     $value[12] = $entry->getAttribute("linebreakstyle");
   337     # not stored in the WG dictionary
   338     $value[13] = ""; # direction
   339     $value[14] = ""; # integral
   341     # 3.3) save the key and value
   342     push(@wg_keys, $key);
   343     $wg_hash{$key} = [ @value ];
   344 }
   345 @wg_keys = reverse(@wg_keys);
   347 ################################################################################
   348 # 4) Compare the two dictionaries and output the result
   350 print "comparing dictionaries...\n";
   351 open($file_differences, ">$FILE_DIFFERENCES") ||
   352     die ("Couldn't open $FILE_DIFFERENCES!");
   353 open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") ||
   354     die ("Couldn't open $FILE_NEW_DICTIONARY!");
   356 $conflicting = 0; $conflicting_stretching = 0;
   357 $new = 0; $new_stretching = 0;
   358 $obsolete = 0; $obsolete_stretching = 0;
   359 $unchanged = 0;
   361 # 4.1) look to the entries of the WG dictionary
   362 while ($key = pop(@wg_keys)) {
   364     @wg = @{ $wg_hash{$key} };
   365     delete $wg_hash{$key};
   366     $wg_value = &generateCommon(@wg);
   368     if (exists($moz_hash{$key})) {
   369         # entry is in both dictionary
   370         @moz = @{ $moz_hash{$key} };
   371         delete $moz_hash{$key};
   372         $moz_value = &generateCommon(@moz);
   373         if ($moz_value ne $wg_value) {
   374             # conflicting entry
   375             print $file_differences "[conflict]";
   376             $conflicting++;
   377             if ($moz[6] != $wg[6]) {
   378                 print $file_differences "[stretching]";
   379                 $conflicting_stretching++;
   380             }
   381             print $file_differences " - $key ($wg[0])\n";
   382             print $file_differences "-$moz_value\n+$wg_value\n\n";
   383             $_ = &completeCommon($wg_value, $key, @moz, @wg);
   384             print $file_new_dictionary $_;
   385         } else {
   386             # unchanged entry
   387             $unchanged++;
   388             $_ = &completeCommon($wg_value, $key, @moz, @wg);
   389             print $file_new_dictionary $_;
   390         }
   391     } else {
   392         # we don't have this entry in our dictionary yet
   393         print $file_differences "[new entry]";
   394         $new++;
   395         if ($wg[6]) {
   396             print $file_differences "[stretching]";
   397             $new_stretching++;
   398         }
   399         print $file_differences " - $key ($wg[0])\n";
   400         print $file_differences "-\n+$wg_value\n\n";
   401         $_ = &completeCommon($wg_value, $key, (), @wg);
   402         print $file_new_dictionary $_;
   403     }
   404 }
   406 print $file_new_dictionary
   407     "\n# Entries below are not part of the official MathML dictionary\n\n";
   408 # 4.2) look in our dictionary the remaining entries
   409 @moz_keys = (keys %moz_hash);
   410 @moz_keys = reverse(sort(@moz_keys));
   412 while ($key = pop(@moz_keys)) {
   413     @moz = @{ $moz_hash{$key} };
   414     $moz_value = &generateCommon(@moz);
   415     print $file_differences "[obsolete entry]";
   416     $obsolete++;
   417     if ($moz[6]) {
   418         print $file_differences "[stretching]";
   419         $obsolete_stretching++;
   420     }
   421     print $file_differences " - $key ($moz[0])\n";
   422     print $file_differences "-$moz_value\n+\n\n";
   423     $_ = &completeCommon($moz_value, $key, (), @moz);
   424     print $file_new_dictionary $_;
   425 }
   427 close($file_differences);
   428 close($file_new_dictionary);
   430 print "\n";
   431 print "- $obsolete obsolete entries ";
   432 print "($obsolete_stretching of them are related to stretching)\n";
   433 print "- $unchanged unchanged entries\n";
   434 print "- $conflicting conflicting entries ";
   435 print "($conflicting_stretching of them are related to stretching)\n";
   436 print "- $new new entries ";
   437 print "($new_stretching of them are related to stretching)\n";
   438 print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n";
   439 print "After having modified the dictionary, please run";
   440 print "./updateOperatorDictionary check\n\n";
   441 exit 0;
   443 ################################################################################
   444 sub usage {
   445     # display the accepted command syntax and quit
   446     print "usage:\n";
   447     print "  ./updateOperatorDictionary.pl download [unicode.xml]\n";
   448     print "  ./updateOperatorDictionary.pl compare [dictionary.xml]\n";
   449     print "  ./updateOperatorDictionary.pl check\n";
   450     print "  ./updateOperatorDictionary.pl make-js\n";
   451     print "  ./updateOperatorDictionary.pl clean\n";
   452     exit 0;
   453 }
   455 sub generateCommon {
   456     # helper function to generate the string of data shared by both dictionaries
   457     my(@v) = @_;
   458     $entry = "lspace:$v[1] rspace:$v[2]";
   459     if ($v[3] ne "1") { $entry = "$entry minsize:$v[3]"; }
   460     if ($v[4]) { $entry = "$entry largeop"; }
   461     if ($v[5]) { $entry = "$entry movablelimits"; }
   462     if ($v[6]) { $entry = "$entry stretchy"; }
   463     if ($v[7]) { $entry = "$entry separator"; }
   464     if ($v[8]) { $entry = "$entry accent"; }
   465     if ($v[9]) { $entry = "$entry fence"; }
   466     if ($v[10]) { $entry = "$entry symmetric"; }
   467     if ($v[15]) { $entry = "$entry mirrorable"; }
   468     return $entry;
   469 }
   471 sub completeCommon {
   472     # helper to add key and private data to generateCommon
   473     my($entry, $key, @v_moz, @v_wg) = @_;
   475     $entry = "$key = $entry";
   477     if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; }
   478     if ($v_moz[14]) { $entry = "$entry integral"; }
   479     if ($v_moz[15]) { $entry = "$entry mirrorable"; }
   481     if ($v_moz[0]) {
   482         # keep our previous comment
   483         $entry = "$entry # $v_moz[0]";
   484     } else {
   485         # otherwise use the description given by the WG
   486         $entry = "$entry # $v_wg[0]";
   487     }
   489     $entry = "$entry\n";
   490     return $entry;
   491 }
   493 sub generateEntry {
   494     # helper function to generate an entry of our operator dictionary
   495     my($key, @moz) = @_;
   496     $entry = &generateCommon(@moz);
   497     $entry = &completeCommon($entry, $key, @moz, @moz);
   498     return $entry;
   499 }

mercurial