intl/uconv/tools/cp936tocdx.pl

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/tools/cp936tocdx.pl	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,133 @@
     1.4 +#!/user/local/bin/perl
     1.5 +# -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
     1.6 +#
     1.7 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.8 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.9 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
    1.10 +$rowwidth = ((0xff - 0x80)+(0x7f - 0x40));
    1.11 +sub cp936tonum()
    1.12 +{
    1.13 +   my($cp936) = (@_);
    1.14 +   my($first,$second,$jnum);
    1.15 +   $first = hex(substr($cp936,2,2));
    1.16 +   $second = hex(substr($cp936,4,2));
    1.17 +   $jnum = ($first - 0x81 ) * $rowwidth;
    1.18 +   if($second >= 0x80)
    1.19 +   {
    1.20 +       $jnum += $second - 0x80 + (0x7f-0x40);
    1.21 +   }
    1.22 +   else
    1.23 +   {
    1.24 +       $jnum += $second - 0x40;
    1.25 +   }
    1.26 +   return $jnum;
    1.27 +}
    1.28 +
    1.29 +@map = {};
    1.30 +sub readtable()
    1.31 +{
    1.32 +open(CP936, "<gbkcommon.txt") || die "cannot open gbkcommon.txt";
    1.33 +while(<CP936>)
    1.34 +{
    1.35 +   if(! /^#/) {
    1.36 +        chop();
    1.37 +        ($j, $u, $r) = split(/\t/,$_);
    1.38 +        if(length($j) > 4)
    1.39 +        {
    1.40 +        $n = &cp936tonum($j);
    1.41 +        $map{$n} = $u;
    1.42 +        }
    1.43 +   } 
    1.44 +}
    1.45 +}
    1.46 +
    1.47 +
    1.48 +sub printtable()
    1.49 +{
    1.50 +  for($i=0;$i<126;$i++)
    1.51 +  {
    1.52 +     printf ( "/* 0x%2XXX */\n", ( $i + 0x81));
    1.53 +     for($j=0;$j<(0x7f-0x40);$j++)
    1.54 +     {
    1.55 +         if("" eq ($map{($i * $rowwidth + $j)}))
    1.56 +         {
    1.57 +            printf "0xFFFD,"
    1.58 +         } 
    1.59 +         else 
    1.60 +         {   
    1.61 +            printf $map{($i * $rowwidth + $j)} . ",";
    1.62 +         }
    1.63 +         if( 0 == (($j + 1) % 8))
    1.64 +         {
    1.65 +            printf "/* 0x%2X%1X%1X*/\n", $i+0x81, 4+($j/16), (7==($j%16))?0:8;
    1.66 +         }
    1.67 +     }
    1.68 +     
    1.69 +	 print "0xFFFD,";
    1.70 +
    1.71 +     printf "/* 0x%2X%1X%1X*/\n", $i+0x81, 4+($j/16),(7==($j%16))?0:8;
    1.72 +     for($j=0;$j < (0xff-0x80);$j++)
    1.73 +     {
    1.74 +         if("" eq ($map{($i * $rowwidth + $j + 0x3f)}))		# user defined chars map to 0xFFFD
    1.75 +         {
    1.76 +
    1.77 +			if ( ( $i == 125 ) and ( $j == (0xff - 0x80 - 1 )))
    1.78 +			{
    1.79 +				printf "0xFFFD";							#has no ',' followed last item
    1.80 +			}
    1.81 +			else
    1.82 +			{
    1.83 +				printf "0xFFFD,";
    1.84 +			}
    1.85 +         } 
    1.86 +		 else
    1.87 +		 {
    1.88 +			if ( ( $i == 125 ) and ( $j == (0xff - 0x80 - 1 )))
    1.89 +			{
    1.90 +				printf $map{($i * $rowwidth + $j + 0x3f)};	#has no ',' followed last item
    1.91 +			}
    1.92 +			else
    1.93 +			{
    1.94 +				printf $map{($i * $rowwidth + $j + 0x3f)} . ",";
    1.95 +			}
    1.96 +		 }
    1.97 +		  	
    1.98 +         if( 0 == (($j + 1) % 8))
    1.99 +         {
   1.100 +            printf "/* 0x%2X%1X%1X*/\n", $i+0x81, 8+($j/16), (7==($j%16))?0:8;
   1.101 +         }
   1.102 +     }
   1.103 +     printf "       /* 0x%2X%1X%1X*/\n", $i+0x81, 8+($j/16),(7==($j%16))?0:8;
   1.104 +  }
   1.105 +}
   1.106 +sub printnpl()
   1.107 +{
   1.108 +$npl = <<END_OF_NPL;
   1.109 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   1.110 +/* This Source Code Form is subject to the terms of the Mozilla Public
   1.111 + * License, v. 2.0. If a copy of the MPL was not distributed with this
   1.112 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   1.113 +END_OF_NPL
   1.114 +print $npl;
   1.115 +}
   1.116 +sub printdontmodify()
   1.117 +{
   1.118 +$dont_modify = <<END_OF_DONT_MODIFY;
   1.119 +/*
   1.120 +  This file is generated by mozilla/intl/uconv/tools/cp936tocdx.pl
   1.121 +  Please do not modify this file by hand
   1.122 +  Instead, you should download CP936.TXT from
   1.123 +  http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
   1.124 +  and put under mozilla/intl/uconv/toools
   1.125 +  and run perl cp936tocdx.pl > ../ucvcn/cp936map.h
   1.126 +  If you have question, mailto:ftan\@netscape.com
   1.127 + */
   1.128 +END_OF_DONT_MODIFY
   1.129 +print $dont_modify;
   1.130 +}
   1.131 +
   1.132 +&readtable();
   1.133 +&printnpl();
   1.134 +&printdontmodify();
   1.135 +&printtable();
   1.136 +

mercurial