michael@0: #!/usr/bin/perl michael@0: # -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*- michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: use XML::LibXSLT; michael@0: use XML::LibXML; michael@0: use LWP::Simple; michael@0: michael@0: # output files michael@0: $FILE_UNICODE = "unicode.xml"; michael@0: $FILE_DICTIONARY = "dictionary.xml"; michael@0: $FILE_DIFFERENCES = "differences.txt"; michael@0: $FILE_NEW_DICTIONARY = "new_dictionary.txt"; michael@0: $FILE_SYNTAX_ERRORS = "syntax_errors.txt"; michael@0: $FILE_JS = "tests/stretchy-and-large-operators.js"; michael@0: michael@0: # our dictionary (property file) michael@0: $MOZ_DICTIONARY = "mathfont.properties"; michael@0: michael@0: # dictionary provided by the W3C in "XML Entity Definitions for Characters" michael@0: $WG_DICTIONARY_URL = "http://www.w3.org/2003/entities/2007xml/unicode.xml"; michael@0: michael@0: # XSL stylesheet to extract relevant data from the dictionary michael@0: $DICTIONARY_XSL = "operatorDictionary.xsl"; michael@0: michael@0: # dictionary provided by the W3C transformed with operatorDictionary.xsl michael@0: $WG_DICTIONARY = $FILE_DICTIONARY; michael@0: michael@0: if (!($#ARGV >= 0 && michael@0: ((($ARGV[0] eq "download") && $#ARGV <= 1) || michael@0: (($ARGV[0] eq "compare") && $#ARGV <= 1) || michael@0: (($ARGV[0] eq "check") && $#ARGV <= 0) || michael@0: (($ARGV[0] eq "make-js") && $#ARGV <= 0) || michael@0: (($ARGV[0] eq "clean") && $#ARGV <= 0)))) { michael@0: &usage; michael@0: } michael@0: michael@0: if ($ARGV[0] eq "download") { michael@0: if ($#ARGV == 1) { michael@0: $WG_DICTIONARY_URL = $ARGV[1]; michael@0: } michael@0: print "Downloading $WG_DICTIONARY_URL...\n"; michael@0: getstore($WG_DICTIONARY_URL, $FILE_UNICODE); michael@0: michael@0: print "Converting $FILE_UNICODE into $FILE_DICTIONARY...\n"; michael@0: my $xslt = XML::LibXSLT->new(); michael@0: my $source = XML::LibXML->load_xml(location => $FILE_UNICODE); michael@0: my $style_doc = XML::LibXML->load_xml(location => $DICTIONARY_XSL, michael@0: no_cdata=>1); michael@0: my $stylesheet = $xslt->parse_stylesheet($style_doc); michael@0: my $results = $stylesheet->transform($source); michael@0: open($file, ">$FILE_DICTIONARY") || die ("Couldn't open $FILE_DICTIONARY!"); michael@0: print $file $stylesheet->output_as_bytes($results); michael@0: close($file); michael@0: exit 0; michael@0: } michael@0: michael@0: if ($ARGV[0] eq "clean") { michael@0: unlink($FILE_UNICODE, michael@0: $FILE_DICTIONARY, michael@0: $FILE_DIFFERENCES, michael@0: $FILE_NEW_DICTIONARY, michael@0: $FILE_SYNTAX_ERRORS); michael@0: exit 0; michael@0: } michael@0: michael@0: if ($ARGV[0] eq "compare" && $#ARGV == 1) { michael@0: $WG_DICTIONARY = $ARGV[1]; michael@0: } michael@0: michael@0: ################################################################################ michael@0: # structure of the dictionary used by this script: michael@0: # - key: same as in mathfont.properties michael@0: # - table: michael@0: # index | value michael@0: # 0 | description michael@0: # 1 | lspace michael@0: # 2 | rspace michael@0: # 3 | minsize michael@0: # 4 | largeop michael@0: # 5 | movablelimits michael@0: # 6 | stretchy michael@0: # 7 | separator michael@0: # 8 | accent michael@0: # 9 | fence michael@0: # 10 | symmetric michael@0: # 11 | priority michael@0: # 12 | linebreakstyle michael@0: # 13 | direction michael@0: # 14 | integral michael@0: # 15 | mirrorable michael@0: michael@0: # 1) build %moz_hash from $MOZ_DICTIONARY michael@0: michael@0: print "loading $MOZ_DICTIONARY...\n"; michael@0: open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!"); michael@0: michael@0: print "building dictionary...\n"; michael@0: while (<$file>) { michael@0: next unless (m/^operator\.(.*)$/); michael@0: (m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/); michael@0: michael@0: # 1.1) build the key michael@0: $key = $1; michael@0: michael@0: # 1.2) build the array michael@0: $_ = $2; michael@0: @value = (); michael@0: $value[0] = $3; michael@0: if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; } michael@0: if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; } michael@0: if (m/^(.*)minsize:(\d)(.*)$/) { $value[3] = $2; } else { $value[3] = "1"; } michael@0: $value[4] = (m/^(.*)largeop(.*)$/); michael@0: $value[5] = (m/^(.*)movablelimits(.*)$/); michael@0: $value[6] = (m/^(.*)stretchy(.*)$/); michael@0: $value[7] = (m/^(.*)separator(.*)$/); michael@0: $value[8] = (m/^(.*)accent(.*)$/); michael@0: $value[9] = (m/^(.*)fence(.*)$/); michael@0: $value[10] = (m/^(.*)symmetric(.*)$/); michael@0: $value[11] = ""; # we don't store "priority" in our dictionary michael@0: $value[12] = ""; # we don't store "linebreakstyle" in our dictionary michael@0: if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; } michael@0: else { $value[13] = ""; } michael@0: $value[14] = (m/^(.*)integral(.*)$/); michael@0: $value[15] = (m/^(.*)mirrorable(.*)$/); michael@0: michael@0: # 1.3) save the key and value michael@0: $moz_hash{$key} = [ @value ]; michael@0: } michael@0: michael@0: close($file); michael@0: michael@0: ################################################################################ michael@0: # 2) If mode "make-js", generate tests/stretchy-and-large-operators.js and quit. michael@0: # If mode "check", verify validity of our operator dictionary and quit. michael@0: # If mode "compare", go to step 3) michael@0: michael@0: if ($ARGV[0] eq "make-js") { michael@0: print "generating file $FILE_JS...\n"; michael@0: open($file_js, ">$FILE_JS") || michael@0: die ("Couldn't open $FILE_JS!"); michael@0: print $file_js "// This file is automatically generated. Do not edit.\n"; michael@0: print $file_js "var stretchy_and_large_operators = ["; michael@0: @moz_keys = (keys %moz_hash); michael@0: while ($key = pop(@moz_keys)) { michael@0: @moz = @{ $moz_hash{$key} }; michael@0: michael@0: $_ = $key; michael@0: (m/^operator\.([\w|\.|\\]*)\.(prefix|infix|postfix)$/); michael@0: $opname = "\\$1.$2: "; michael@0: michael@0: if (@moz[4]) { michael@0: print $file_js "['$opname', '$1','l','$2'],"; michael@0: } michael@0: michael@0: if (@moz[6]) { michael@0: $_ = substr(@moz[13], 0, 1); michael@0: print $file_js "['$opname', '$1','$_','$2'],"; michael@0: } michael@0: } michael@0: print $file_js "];\n"; michael@0: close($file_js); michael@0: exit 0; michael@0: } michael@0: michael@0: if ($ARGV[0] eq "check") { michael@0: print "checking operator dictionary...\n"; michael@0: open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") || michael@0: die ("Couldn't open $FILE_SYNTAX_ERRORS!"); michael@0: michael@0: $nb_errors = 0; michael@0: $nb_warnings = 0; michael@0: @moz_keys = (keys %moz_hash); michael@0: # check the validity of our private data michael@0: while ($key = pop(@moz_keys)) { michael@0: @moz = @{ $moz_hash{$key} }; michael@0: $entry = &generateEntry($key, @moz); michael@0: $valid = 1; michael@0: michael@0: if (!(@moz[13] eq "" || michael@0: @moz[13] eq "horizontal" || michael@0: @moz[13] eq "vertical")) { michael@0: $valid = 0; michael@0: $nb_errors++; michael@0: print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n"; michael@0: } michael@0: michael@0: if (!@moz[4] && @moz[14]) { michael@0: $valid = 0; michael@0: $nb_warnings++; michael@0: print $file_syntax_errors "warning: operator is integral but not largeop\n"; michael@0: } michael@0: michael@0: $_ = @moz[0]; michael@0: if ((m/^(.*)[iI]ntegral(.*)$/) && !@moz[14]) { michael@0: $valid = 0; michael@0: $nb_warnings++; michael@0: print $file_syntax_errors "warning: operator contains the term \"integral\" in its comment, but is not integral\n"; michael@0: } michael@0: michael@0: if (!$valid) { michael@0: print $file_syntax_errors $entry; michael@0: print $file_syntax_errors "\n"; michael@0: } michael@0: } michael@0: michael@0: # check that all forms have the same direction. michael@0: @moz_keys = (keys %moz_hash); michael@0: while ($key = pop(@moz_keys)) { michael@0: michael@0: if (@{ $moz_hash{$key} }) { michael@0: # the operator has not been removed from the hash table yet. michael@0: michael@0: $_ = $key; michael@0: (m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/); michael@0: $key_prefix = "$1.prefix"; michael@0: $key_infix = "$1.infix"; michael@0: $key_postfix = "$1.postfix"; michael@0: @moz_prefix = @{ $moz_hash{$key_prefix} }; michael@0: @moz_infix = @{ $moz_hash{$key_infix} }; michael@0: @moz_postfix = @{ $moz_hash{$key_postfix} }; michael@0: michael@0: $same_direction = 1; michael@0: michael@0: if (@moz_prefix) { michael@0: if (@moz_infix && michael@0: !($moz_infix[13] eq $moz_prefix[13])) { michael@0: $same_direction = 0; michael@0: } michael@0: if (@moz_postfix && michael@0: !($moz_postfix[13] eq $moz_prefix[13])) { michael@0: $same_direction = 0; michael@0: } michael@0: } michael@0: if (@moz_infix) { michael@0: if (@moz_postfix && michael@0: !($moz_postfix[13] eq $moz_infix[13])) { michael@0: $same_direction = 0; michael@0: } michael@0: } michael@0: michael@0: if (!$same_direction) { michael@0: $nb_errors++; michael@0: print $file_syntax_errors michael@0: "error: operator has a stretchy form, but all forms"; michael@0: print $file_syntax_errors michael@0: " have not the same direction\n"; michael@0: if (@moz_prefix) { michael@0: $_ = &generateEntry($key_prefix, @moz_prefix); michael@0: print $file_syntax_errors $_; michael@0: } michael@0: if (@moz_infix) { michael@0: $_ = &generateEntry($key_infix, @moz_infix); michael@0: print $file_syntax_errors $_; michael@0: } michael@0: if (@moz_postfix) { michael@0: $_ = &generateEntry($key_postfix, @moz_postfix); michael@0: print $file_syntax_errors $_; michael@0: } michael@0: print $file_syntax_errors "\n"; michael@0: } michael@0: michael@0: if (@moz_prefix) { michael@0: delete $moz_hash{$key.prefix}; michael@0: } michael@0: if (@moz_infix) { michael@0: delete $moz_hash{$key_infix}; michael@0: } michael@0: if (@moz_postfix) { michael@0: delete $moz_hash{$key_postfix}; michael@0: } michael@0: } michael@0: } michael@0: michael@0: close($file_syntax_errors); michael@0: print "\n"; michael@0: if ($nb_errors > 0 || $nb_warnings > 0) { michael@0: print "$nb_errors error(s) found\n"; michael@0: print "$nb_warnings warning(s) found\n"; michael@0: print "See output file $FILE_SYNTAX_ERRORS.\n\n"; michael@0: } else { michael@0: print "No error found.\n\n"; michael@0: } michael@0: michael@0: exit 0; michael@0: } michael@0: michael@0: ################################################################################ michael@0: # 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY michael@0: michael@0: print "loading $WG_DICTIONARY...\n"; michael@0: my $parser = XML::LibXML->new(); michael@0: my $doc = $parser->parse_file($WG_DICTIONARY); michael@0: michael@0: print "building dictionary...\n"; michael@0: @wg_keys = (); michael@0: michael@0: foreach my $entry ($doc->findnodes('/root/entry')) { michael@0: # 3.1) build the key michael@0: $key = "operator."; michael@0: michael@0: $_ = $entry->getAttribute("unicode"); michael@0: $_ = "$_-"; michael@0: while (m/^U?0(\w*)-(.*)$/) { michael@0: # Concatenate .\uNNNN michael@0: $key = "$key\\u$1"; michael@0: $_ = $2; michael@0: } michael@0: michael@0: $_ = $entry->getAttribute("form"); # "Form" michael@0: $key = "$key.$_"; michael@0: michael@0: # 3.2) build the array michael@0: @value = (); michael@0: $value[0] = lc($entry->getAttribute("description")); michael@0: $value[1] = $entry->getAttribute("lspace"); michael@0: if ($value[1] eq "") { $value[1] = "5"; } michael@0: $value[2] = $entry->getAttribute("rspace"); michael@0: if ($value[2] eq "") { $value[2] = "5"; } michael@0: $value[3] = $entry->getAttribute("minsize"); michael@0: if ($value[3] eq "") { $value[3] = "1"; } michael@0: michael@0: $_ = $entry->getAttribute("properties"); michael@0: $value[4] = (m/^(.*)largeop(.*)$/); michael@0: $value[5] = (m/^(.*)movablelimits(.*)$/); michael@0: $value[6] = (m/^(.*)stretchy(.*)$/); michael@0: $value[7] = (m/^(.*)separator(.*)$/); michael@0: $value[8] = (m/^(.*)accent(.*)$/); michael@0: $value[9] = (m/^(.*)fence(.*)$/); michael@0: $value[10] = (m/^(.*)symmetric(.*)$/); michael@0: $value[15] = (m/^(.*)mirrorable(.*)$/); michael@0: $value[11] = $entry->getAttribute("priority"); michael@0: $value[12] = $entry->getAttribute("linebreakstyle"); michael@0: michael@0: # not stored in the WG dictionary michael@0: $value[13] = ""; # direction michael@0: $value[14] = ""; # integral michael@0: michael@0: # 3.3) save the key and value michael@0: push(@wg_keys, $key); michael@0: $wg_hash{$key} = [ @value ]; michael@0: } michael@0: @wg_keys = reverse(@wg_keys); michael@0: michael@0: ################################################################################ michael@0: # 4) Compare the two dictionaries and output the result michael@0: michael@0: print "comparing dictionaries...\n"; michael@0: open($file_differences, ">$FILE_DIFFERENCES") || michael@0: die ("Couldn't open $FILE_DIFFERENCES!"); michael@0: open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") || michael@0: die ("Couldn't open $FILE_NEW_DICTIONARY!"); michael@0: michael@0: $conflicting = 0; $conflicting_stretching = 0; michael@0: $new = 0; $new_stretching = 0; michael@0: $obsolete = 0; $obsolete_stretching = 0; michael@0: $unchanged = 0; michael@0: michael@0: # 4.1) look to the entries of the WG dictionary michael@0: while ($key = pop(@wg_keys)) { michael@0: michael@0: @wg = @{ $wg_hash{$key} }; michael@0: delete $wg_hash{$key}; michael@0: $wg_value = &generateCommon(@wg); michael@0: michael@0: if (exists($moz_hash{$key})) { michael@0: # entry is in both dictionary michael@0: @moz = @{ $moz_hash{$key} }; michael@0: delete $moz_hash{$key}; michael@0: $moz_value = &generateCommon(@moz); michael@0: if ($moz_value ne $wg_value) { michael@0: # conflicting entry michael@0: print $file_differences "[conflict]"; michael@0: $conflicting++; michael@0: if ($moz[6] != $wg[6]) { michael@0: print $file_differences "[stretching]"; michael@0: $conflicting_stretching++; michael@0: } michael@0: print $file_differences " - $key ($wg[0])\n"; michael@0: print $file_differences "-$moz_value\n+$wg_value\n\n"; michael@0: $_ = &completeCommon($wg_value, $key, @moz, @wg); michael@0: print $file_new_dictionary $_; michael@0: } else { michael@0: # unchanged entry michael@0: $unchanged++; michael@0: $_ = &completeCommon($wg_value, $key, @moz, @wg); michael@0: print $file_new_dictionary $_; michael@0: } michael@0: } else { michael@0: # we don't have this entry in our dictionary yet michael@0: print $file_differences "[new entry]"; michael@0: $new++; michael@0: if ($wg[6]) { michael@0: print $file_differences "[stretching]"; michael@0: $new_stretching++; michael@0: } michael@0: print $file_differences " - $key ($wg[0])\n"; michael@0: print $file_differences "-\n+$wg_value\n\n"; michael@0: $_ = &completeCommon($wg_value, $key, (), @wg); michael@0: print $file_new_dictionary $_; michael@0: } michael@0: } michael@0: michael@0: print $file_new_dictionary michael@0: "\n# Entries below are not part of the official MathML dictionary\n\n"; michael@0: # 4.2) look in our dictionary the remaining entries michael@0: @moz_keys = (keys %moz_hash); michael@0: @moz_keys = reverse(sort(@moz_keys)); michael@0: michael@0: while ($key = pop(@moz_keys)) { michael@0: @moz = @{ $moz_hash{$key} }; michael@0: $moz_value = &generateCommon(@moz); michael@0: print $file_differences "[obsolete entry]"; michael@0: $obsolete++; michael@0: if ($moz[6]) { michael@0: print $file_differences "[stretching]"; michael@0: $obsolete_stretching++; michael@0: } michael@0: print $file_differences " - $key ($moz[0])\n"; michael@0: print $file_differences "-$moz_value\n+\n\n"; michael@0: $_ = &completeCommon($moz_value, $key, (), @moz); michael@0: print $file_new_dictionary $_; michael@0: } michael@0: michael@0: close($file_differences); michael@0: close($file_new_dictionary); michael@0: michael@0: print "\n"; michael@0: print "- $obsolete obsolete entries "; michael@0: print "($obsolete_stretching of them are related to stretching)\n"; michael@0: print "- $unchanged unchanged entries\n"; michael@0: print "- $conflicting conflicting entries "; michael@0: print "($conflicting_stretching of them are related to stretching)\n"; michael@0: print "- $new new entries "; michael@0: print "($new_stretching of them are related to stretching)\n"; michael@0: print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n"; michael@0: print "After having modified the dictionary, please run"; michael@0: print "./updateOperatorDictionary check\n\n"; michael@0: exit 0; michael@0: michael@0: ################################################################################ michael@0: sub usage { michael@0: # display the accepted command syntax and quit michael@0: print "usage:\n"; michael@0: print " ./updateOperatorDictionary.pl download [unicode.xml]\n"; michael@0: print " ./updateOperatorDictionary.pl compare [dictionary.xml]\n"; michael@0: print " ./updateOperatorDictionary.pl check\n"; michael@0: print " ./updateOperatorDictionary.pl make-js\n"; michael@0: print " ./updateOperatorDictionary.pl clean\n"; michael@0: exit 0; michael@0: } michael@0: michael@0: sub generateCommon { michael@0: # helper function to generate the string of data shared by both dictionaries michael@0: my(@v) = @_; michael@0: $entry = "lspace:$v[1] rspace:$v[2]"; michael@0: if ($v[3] ne "1") { $entry = "$entry minsize:$v[3]"; } michael@0: if ($v[4]) { $entry = "$entry largeop"; } michael@0: if ($v[5]) { $entry = "$entry movablelimits"; } michael@0: if ($v[6]) { $entry = "$entry stretchy"; } michael@0: if ($v[7]) { $entry = "$entry separator"; } michael@0: if ($v[8]) { $entry = "$entry accent"; } michael@0: if ($v[9]) { $entry = "$entry fence"; } michael@0: if ($v[10]) { $entry = "$entry symmetric"; } michael@0: if ($v[15]) { $entry = "$entry mirrorable"; } michael@0: return $entry; michael@0: } michael@0: michael@0: sub completeCommon { michael@0: # helper to add key and private data to generateCommon michael@0: my($entry, $key, @v_moz, @v_wg) = @_; michael@0: michael@0: $entry = "$key = $entry"; michael@0: michael@0: if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; } michael@0: if ($v_moz[14]) { $entry = "$entry integral"; } michael@0: if ($v_moz[15]) { $entry = "$entry mirrorable"; } michael@0: michael@0: if ($v_moz[0]) { michael@0: # keep our previous comment michael@0: $entry = "$entry # $v_moz[0]"; michael@0: } else { michael@0: # otherwise use the description given by the WG michael@0: $entry = "$entry # $v_wg[0]"; michael@0: } michael@0: michael@0: $entry = "$entry\n"; michael@0: return $entry; michael@0: } michael@0: michael@0: sub generateEntry { michael@0: # helper function to generate an entry of our operator dictionary michael@0: my($key, @moz) = @_; michael@0: $entry = &generateCommon(@moz); michael@0: $entry = &completeCommon($entry, $key, @moz, @moz); michael@0: return $entry; michael@0: }