intl/uconv/tools/parse-mozilla-encoding-table.pl

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/tools/parse-mozilla-encoding-table.pl	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,92 @@
     1.4 +#!/usr/bin/perl
     1.5 +# parse-mozilla-encoding-table.pl, version 0.6
     1.6 +#
     1.7 +# Script to deassemble existing Mozilla *.uf or *.ut files
     1.8 +# back to source conversion tables.
     1.9 +# by Anthony Fok <anthony@thizlinux.com>, ThizLinux Laboratory Ltd., 2002/11/27
    1.10 +# License: GNU General Public License, version 2 or newer
    1.11 +# 
    1.12 +# Used for verifying HKSCS-1999 hkscs.uf and hkscs.ut so that I can make
    1.13 +# new ones for HKSCS-2001.  This script is quick-and-dirty and not very
    1.14 +# robust, so if the debug output of fromu/tou ever changes, this script
    1.15 +# will need to be modified too.  :-)
    1.16 +
    1.17 +my %data = ();
    1.18 +my $mappingPos = 0;
    1.19 +my $filename = shift;
    1.20 +my $mode;
    1.21 +if ($filename =~ /\.(ut|uf)$/) {
    1.22 +  print $filename, "\n";
    1.23 +  $mode = $1;
    1.24 +} else {
    1.25 +  die;
    1.26 +}
    1.27 +
    1.28 +open(INFILE, "<$filename") or die;
    1.29 +
    1.30 +# Quick-and-dirty routine to populate %data
    1.31 +while (<INFILE>) {
    1.32 +  if (/^Begin of Item ([[:xdigit:]]+)/) {
    1.33 +    die if defined($itemId) and hex($itemId) + 1 != hex($1);
    1.34 +    $itemId = $1;
    1.35 +    <INFILE> =~ /Format ([012])/ or die;
    1.36 +    $format = $1;
    1.37 +    <INFILE> =~ /srcBegin = ([[:xdigit:]]+)/ or die;
    1.38 +    $srcBegin = $1;
    1.39 +    
    1.40 +    if ($format == 0) {		# Range
    1.41 +      <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die;
    1.42 +      $srcEnd = $1;
    1.43 +      <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die;
    1.44 +      $destBegin = $1;
    1.45 +
    1.46 +      for ($i = hex($srcBegin); $i <= hex($srcEnd); $i++) {
    1.47 +        $data{sprintf("%04X",$i)} = sprintf("%04X",
    1.48 +	    hex($destBegin) + $i - hex($srcBegin));
    1.49 +      }
    1.50 +
    1.51 +      <INFILE> =~ /^End of Item $itemId\s*$/ or die;
    1.52 +    }
    1.53 +    elsif ($format == 1) {	# Mapping
    1.54 +      <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die;
    1.55 +      $srcEnd = $1;
    1.56 +      <INFILE> =~ /mappingOffset = ([[:xdigit:]]+)/ or die;
    1.57 +      $mappingOffset = hex($1);
    1.58 +      die unless $mappingOffset == $mappingPos;
    1.59 +      <INFILE> =~ /Mapping  =\s*$/ or die;
    1.60 +      until ($_ = <INFILE>, /^End of Item/) {
    1.61 +        chop;
    1.62 +        for $i (split ' ') {
    1.63 +	  $key = sprintf("%04X", hex($srcBegin) - $mappingOffset + $mappingPos++);
    1.64 +	  next if $i eq "FFFD";
    1.65 +	  if (defined($data{$key})) {
    1.66 +	    print "Error: doubly defined. $key was $data{$key}, and now $i.\n";
    1.67 +	  } else {
    1.68 +	    $data{$key} = $i;
    1.69 +	  }
    1.70 +	}
    1.71 +      }
    1.72 +      die unless $mappingPos - $mappingOffset == hex($srcEnd) - hex($srcBegin) + 1;
    1.73 +      /^End of Item $itemId\s*$/ or die;
    1.74 +    }
    1.75 +    else {			# Single ($format == 2)
    1.76 +      <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die;
    1.77 +      $destBegin = $1;
    1.78 +      $data{$srcBegin} = $destBegin;
    1.79 +      <INFILE> =~ /^End of Item $itemId\s*$/ or die;
    1.80 +    }
    1.81 +  }
    1.82 +}
    1.83 +
    1.84 +# Generate conversion table
    1.85 +for $key (sort keys %data) {
    1.86 +  if ($mode eq "ut") {
    1.87 +    print "0x$key\t0x$data{$key}\n";
    1.88 +  } elsif ($mode eq "uf") {
    1.89 +    print "0x$data{$key}\t0x$key\n";
    1.90 +  } else {
    1.91 +    die;
    1.92 +  }
    1.93 +}
    1.94 +
    1.95 +close INFILE;

mercurial