intl/lwbrk/tools/anzx4051.pl

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 #!/usr/bin/perl 
     2 #
     3 # This Source Code Form is subject to the terms of the Mozilla Public
     4 # License, v. 2.0. If a copy of the MPL was not distributed with this
     5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     7 ######################################################################
     8 #
     9 # Initial global variable
    10 #
    11 ######################################################################
    12 %utot = ();
    13 $ui=0;
    14 $li=0;
    16 ######################################################################
    17 #
    18 # Open the unicode database file
    19 #
    20 ######################################################################
    21 open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt") 
    22    || die "cannot find UnicodeData-Latest.txt";
    24 ######################################################################
    25 #
    26 # Open the JIS X 4051 Class file
    27 #
    28 ######################################################################
    29 open ( CLASS , "< jisx4051class.txt") 
    30    || die "cannot find jisx4051class.txt";
    32 ######################################################################
    33 #
    34 # Open the JIS X 4051 Class simplified mapping
    35 #
    36 ######################################################################
    37 open ( SIMP , "< jisx4051simp.txt") 
    38    || die "cannot find jisx4051simp.txt";
    40 ######################################################################
    41 #
    42 # Open the output file
    43 #
    44 ######################################################################
    45 open ( OUT , "> anzx4051.html") 
    46   || die "cannot open output anzx4051.html file";
    48 ######################################################################
    49 #
    50 # Open the output file
    51 #
    52 ######################################################################
    53 open ( HEADER , "> ../src/jisx4051class.h") 
    54   || die "cannot open output ../src/jisx4051class.h file";
    56 ######################################################################
    57 #
    58 # Generate license and header
    59 #
    60 ######################################################################
    61 $hthmlheader = <<END_OF_HTML;
    62 <!-- This Source Code Form is subject to the terms of the Mozilla Public
    63    - License, v. 2.0. If a copy of the MPL was not distributed with this
    64    - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
    66 <HTML>
    67 <HEAD>
    68 <TITLE>
    69 Analysis of JIS X 4051 to Unicode General Category Mapping
    70 </TITLE>
    71 </HEAD>
    72 <BODY>
    73 <H1>
    74 Analysis of JIS X 4051 to Unicode General Category Mapping
    75 </H1>
    76 END_OF_HTML
    77 print OUT $hthmlheader;
    79 ######################################################################
    80 #
    81 # Generate license and header
    82 #
    83 ######################################################################
    84 $npl = <<END_OF_NPL;
    85 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    86 /* This Source Code Form is subject to the terms of the Mozilla Public
    87  * License, v. 2.0. If a copy of the MPL was not distributed with this
    88  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    89 /* 
    90     DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
    91     mozilla/intl/lwbrk/tools/anzx4051.pl
    92  */
    93 END_OF_NPL
    94 print HEADER $npl;
    96 %occ = ();
    97 %gcat = ();
    98 %dcat = ();
    99 %simp = ();
   100 %gcount = ();
   101 %dcount = ();
   102 %sccount = ();
   103 %rangecount = ();
   105 ######################################################################
   106 #
   107 # Process the file line by line
   108 #
   109 ######################################################################
   110 while(<UNICODATA>) {
   111    chop;
   112    ######################################################################
   113    #
   114    # Get value from fields
   115    #
   116    ######################################################################
   117    @f = split(/;/ , $_); 
   118    $c = $f[0];   # The unicode value
   119    $g = $f[2]; 
   120    $d = substr($g, 0, 1);
   122    $gcat{$c} = $g;
   123    $dcat{$c} = $d;
   124    $gcount{$g}++;
   125    $dcount{$d}++;
   126 }
   127 close(UNIDATA);
   129 while(<SIMP>) {
   130    chop;
   131    ######################################################################
   132    #
   133    # Get value from fields
   134    #
   135    ######################################################################
   136    @f = split(/;/ , $_); 
   138    $simp{$f[0]} = $f[1];
   139    $sccount{$f[1]}++;
   140 }
   141 close(SIMP);
   143 sub GetClass{
   144   my ($u) = @_;
   145   my $hex = DecToHex($u);
   146   $g = $gcat{$hex};
   147   if($g ne "") {
   148     return $g;
   149   } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 )  ) {
   150     return "Han";
   151   } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 )  ) {
   152     return "Lo";
   153   } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f )  ) {
   154     return "Cs";
   155   } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff )  ) {
   156     return "Cs";
   157   } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff )  ) {
   158     return "Cs";
   159   } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff )  ) {
   160     return "Co";
   161   } else {
   162     printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex;
   163   }
   164 }
   165 sub GetDClass{
   166   my ($u) = @_;
   167   my $hex = DecToHex($u);
   168   $g = $dcat{$hex};
   169   if($g ne "") {
   170     return $g;
   171   } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 )  ) {
   172     return "Han";
   173   } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 )  ) {
   174     return "L";
   175   } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f )  ) {
   176     return "C";
   177   } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff )  ) {
   178     return "C";
   179   } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff )  ) {
   180     return "C";
   181   } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff )  ) {
   182     return "C";
   183   } else {
   184     printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex;
   185   }
   186 }
   187 sub DecToHex{
   188      my ($d) = @_;
   189      return sprintf("%04X", $d); 
   190 }
   191 %gtotal = ();
   192 %dtotal = ();
   193 while(<CLASS>) {
   194    chop;
   195    ######################################################################
   196    #
   197    # Get value from fields
   198    #
   199    ######################################################################
   200    @f = split(/;/ , $_); 
   202    if( substr($f[2], 0, 1) ne "a")
   203    {
   204      $sc = $simp{$f[2]};
   205      $l = hex($f[0]);
   206      if($f[1] eq "")
   207      {
   208        $h = $l;
   209      } else {
   210        $h = hex($f[1]);
   211      }
   212      for($k = $l; $k <= $h ; $k++)
   213      {
   214        if( exists($occ{$k}))
   215        {
   216           #  printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n", 
   217           #         DecToHex($k),  $occ{$k} , $f[2] , $sc;
   218        }
   219        else
   220        {
   221            $occ{$k} = $sc . " | " . $f[2];
   222            $gclass = GetClass($k); 
   223            $dclass = GetDClass($k);
   224            $gtotal{$sc . $gclass}++;
   225            $dtotal{$sc . $dclass}++;
   226            $u = DecToHex($k);
   227            $rk = " " . substr($u,0,2) . ":" . $sc;
   228            $rangecount{$rk}++;
   229        }
   230      }
   231   }
   232 }
   234 #print %gtotal;
   235 #print %dtotal;
   237 sub printreport 
   238 {
   239     print OUT "<TABLE BORDER=3>\n";
   240     print OUT "<TR BGCOLOR=blue><TH><TH>\n";
   242     foreach $d (sort(keys %dcount)) {
   243        print OUT "<TD BGCOLOR=red>$d</TD>\n";
   244     }
   246     print OUT "<TD BGCOLOR=white>Total</TD>\n";
   247     foreach $g (sort(keys %gcount)) {
   248        print OUT "<TD BGCOLOR=yellow>$g</TD>\n";
   249     }
   250     print OUT "</TR>\n";
   251     foreach $sc (sort(keys %sccount)) {
   253        print OUT "<TR><TH>$sc<TH>\n";
   255        $total = 0; 
   256        foreach $d (sort (keys %dcount)) {
   257          $count = $dtotal{$sc . $d};
   258          $total += $count;
   259          print OUT "<TD>$count</TD>\n";
   260        }
   262        print OUT "<TD BGCOLOR=white>$total</TD>\n";
   264        foreach $g (sort(keys %gcount)) {
   265          $count = $gtotal{$sc . $g};
   266          print OUT "<TD>$count</TD>\n";
   267        }
   270        print OUT "</TR>\n";
   271     }
   272     print OUT "</TABLE>\n";
   275     print OUT "<TABLE BORDER=3>\n";
   276     print OUT "<TR BGCOLOR=blue><TH><TH>\n";
   278     foreach $sc (sort(keys %sccount)) 
   279     {
   280        print OUT "<TD BGCOLOR=red>$sc</TD>\n";
   281     }
   283     print OUT "</TR>\n";
   286     for($rr = 0; $rr < 0x4f; $rr++)
   287     {
   288        $empty = 0;
   289        $r = sprintf("%02X" , $rr) ;
   290        $tmp = "<TR><TH>" . $r . "<TH>\n";
   292        foreach $sc (sort(keys %sccount)) {
   293          $count = $rangecount{ " " .$r . ":" .$sc};
   294          $tmp .= sprintf("<TD>%s</TD>\n", $count);
   295          $empty += $count;
   296        }
   298        $tmp .=  "</TR>\n";
   300        if($empty ne 0) 
   301        {
   302           print OUT $tmp;
   303        }
   304     }
   305     print OUT "</TABLE>\n";
   307 }
   308 printreport();
   310 sub printarray
   311 {
   312    my($r, $def) = @_;
   313 printf "[%s || %s]\n", $r, $def;
   314    $k = hex($r) * 256;
   315    printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r;
   316    for($i = 0 ; $i < 256; $i+= 8)
   317    {  
   318       for($j = 7 ; $j >= 0; $j-- )
   319       {  
   320           $v = $k + $i + $j;
   321           if( exists($occ{$v})) 
   322 	  {
   323              $p = substr($occ{$v}, 1,1);
   324           } else {
   325              $p = $def;
   326           }
   328           if($j eq 7 ) 
   329           {
   330              printf HEADER "0x%s" , $p;
   331           } else {
   332              printf HEADER "%s", $p ;
   333           }
   334       }
   335       printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7);
   336    }
   337    print HEADER "};\n\n";
   338 }
   339 printarray("00", "7");
   340 printarray("20", "7");
   341 printarray("21", "7");
   342 printarray("30", "5");
   343 printarray("0E", "8");
   344 printarray("17", "7");
   346 #print %rangecount;
   348 ######################################################################
   349 #
   350 # Close files
   351 #
   352 ######################################################################
   353 close(HEADER);
   354 close(CLASS);
   355 close(OUT);

mercurial