intl/lwbrk/src/jisx4051pairtable.txt

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     4 /* 
     6    Simplification of Pair Table in JIS X 4051
     8    1. The Origion Table - in 4.1.3
    10    In JIS x 4051. The pair table is defined as below
    12    Class of
    13    Leading    Class of Trailing Char Class
    14    Char        
    16               1  2  3  4  5  6  7  8  9 10 11 12 13 13 14 14 15 16 17 18 19 20
    17                                                  *  #  *  #
    18         1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  E
    19         2        X  X  X  X  X                                               X
    20         3        X  X  X  X  X                                               X
    21         4        X  X  X  X  X                                               X
    22         5        X  X  X  X  X                                               X
    23         6        X  X  X  X  X                                               X
    24         7        X  X  X  X  X  X                                            X 
    25         8        X  X  X  X  X                                X              E 
    26         9        X  X  X  X  X                                               X
    27        10        X  X  X  X  X                                               X
    28        11        X  X  X  X  X                                               X
    29        12        X  X  X  X  X                                               X  
    30        13        X  X  X  X  X                    X                          X
    31        14        X  X  X  X  X                          X                    X
    32        15        X  X  X  X  X        X                       X        X     X 
    33        16        X  X  X  X  X                                   X     X     X
    34        17        X  X  X  X  X                                               E 
    35        18        X  X  X  X  X                                X  X     X     X 
    36        19     X  E  E  E  E  E  X  X  X  X  X  X  X  X  X  X  X  X  E  X  E  E
    37        20        X  X  X  X  X                                               E
    39    * Same Char
    40    # Other Char
    42    2. Simplified by remove the class which we do not care
    44    However, since we do not care about class 13(Subscript), 14(Ruby), 
    45    19(split line note begin quote), and 20(split line note end quote) 
    46    we can simplify this par table into the following 
    48    Class of
    49    Leading    Class of Trailing Char Class
    50    Char        
    52               1  2  3  4  5  6  7  8  9 10 11 12 15 16 17 18 
    54         1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X
    55         2        X  X  X  X  X                             
    56         3        X  X  X  X  X                            
    57         4        X  X  X  X  X                           
    58         5        X  X  X  X  X                          
    59         6        X  X  X  X  X                         
    60         7        X  X  X  X  X  X                      
    61         8        X  X  X  X  X                    X    
    62         9        X  X  X  X  X                                   
    63        10        X  X  X  X  X                                  
    64        11        X  X  X  X  X                                 
    65        12        X  X  X  X  X                                
    66        15        X  X  X  X  X        X           X        X    
    67        16        X  X  X  X  X                       X     X    
    68        17        X  X  X  X  X                                  
    69        18        X  X  X  X  X                    X  X     X    
    71    3. Simplified by merged classes
    73    After the 2 simplification, the pair table have some duplication 
    74    a. class 2, 3, 4, 5, 6,  are the same- we can merged them
    75    b. class 10, 11, 12, 17  are the same- we can merged them
    78    Class of
    79    Leading    Class of Trailing Char Class
    80    Char        
    82               1 [a] 7  8  9 [b]15 16 18 
    84         1     X  X  X  X  X  X  X  X  X
    85       [a]        X                             
    86         7        X  X                      
    87         8        X              X    
    88         9        X                                   
    89       [b]        X                                  
    90        15        X        X     X     X    
    91        16        X                 X  X    
    92        18        X              X  X  X    
    95    4. Now we use one bit to encode weather it is breakable, and use 2 bytes
    96       for one row, then the bit table will look like:
    98                  18    <-   1
   100        1  0000 0001 1111 1111  = 0x01FF
   101       [a] 0000 0000 0000 0010  = 0x0002
   102        7  0000 0000 0000 0110  = 0x0006
   103        8  0000 0000 0100 0010  = 0x0042
   104        9  0000 0000 0000 0010  = 0x0002
   105       [b] 0000 0000 0000 0010  = 0x0042
   106       15  0000 0001 0101 0010  = 0x0152
   107       16  0000 0001 1000 0010  = 0x0182
   108       17  0000 0001 1100 0010  = 0x01C2
   110 */
   112 static uint16_t gJISx4051SimplifiedPair[9] = {
   113   0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
   114 };
   116 PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
   117 {
   118   NS_ASSERTION( (aCls1 < 9) "invalid class");
   119   NS_ASSERTION( (aCls2 < 9) "invalid class");
   120   return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
   121 }
   124 #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
   126 nsJISx4051Cls XXXX::GetClass(
   127    PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
   128 {
   129    // take care the special case in cls 15
   130    if( ((0x2C == aChar) || (0x2E == aChar)) &&
   131        (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
   132    {
   133      return kJISx4051Cls_15;
   134    }
   136    nsJISx4051Cls cls;
   137    if(gSingle->Lookup(aChar, &cls))
   138      return cls;
   140    if(gRange->Lookup(aChar, &cls))
   141      return cls;
   143    return kJISx4051Cls_15;
   144 }
   147 typedef enum {
   148   kJISx4051Cls_1 = 0,
   149   kJISx4051Cls_2 = 1,
   150   kJISx4051Cls_3 = 1,
   151   kJISx4051Cls_4 = 1,
   152   kJISx4051Cls_5 = 1,
   153   kJISx4051Cls_6 = 1,
   154   kJISx4051Cls_7 = 2,
   155   kJISx4051Cls_8 = 3,
   156   kJISx4051Cls_9 = 4,
   157   kJISx4051Cls_10 = 5,
   158   kJISx4051Cls_11 = 5,
   159   kJISx4051Cls_12 = 5,
   160   // kJISx4051Cls_13 = 0,
   161   // kJISx4051Cls_14 = 0,
   162   kJISx4051Cls_15 = 6,
   163   kJISx4051Cls_16 = 7,
   164   kJISx4051Cls_17 = 5,
   165   kJISx4051Cls_18 = 8,
   166   // kJISx4051Cls_19 = 0,
   167   // kJISx4051Cls_20 = 0
   168 } nsJISx4051Cls;
   171   // Table 2
   172   YYYY(kJISx4051Cls_1 , 0x0028),
   173   YYYY(kJISx4051Cls_1 , 0x005B),
   174   YYYY(kJISx4051Cls_1 , 0x007B),
   175   YYYY(kJISx4051Cls_1 , 0x2018),
   176   YYYY(kJISx4051Cls_1 , 0x201B),
   177   YYYY(kJISx4051Cls_1 , 0x201C),
   178   YYYY(kJISx4051Cls_1 , 0x201F),
   179   YYYY(kJISx4051Cls_1 , 0x3008),
   180   YYYY(kJISx4051Cls_1 , 0x300A),
   181   YYYY(kJISx4051Cls_1 , 0x300C),
   182   YYYY(kJISx4051Cls_1 , 0x300E),
   183   YYYY(kJISx4051Cls_1 , 0x3010),
   184   YYYY(kJISx4051Cls_1 , 0x3014),
   185   YYYY(kJISx4051Cls_1 , 0x3016),
   186   YYYY(kJISx4051Cls_1 , 0x3018),
   187   YYYY(kJISx4051Cls_1 , 0x301A),
   188   YYYY(kJISx4051Cls_1 , 0x301D),
   190   // Table 3
   191   YYYY(kJISx4051Cls_2 , 0x0029),
   192   YYYY(kJISx4051Cls_2 , 0x002C),
   193   YYYY(kJISx4051Cls_2 , 0x005D),
   194   YYYY(kJISx4051Cls_2 , 0x007D),
   195   YYYY(kJISx4051Cls_2 , 0x2019),
   196   YYYY(kJISx4051Cls_2 , 0x201A),
   197   YYYY(kJISx4051Cls_2 , 0x201D),
   198   YYYY(kJISx4051Cls_2 , 0x201E),
   199   YYYY(kJISx4051Cls_2 , 0x3001),
   200   YYYY(kJISx4051Cls_2 , 0x3009),
   201   YYYY(kJISx4051Cls_2 , 0x300B),
   202   YYYY(kJISx4051Cls_2 , 0x300D),
   203   YYYY(kJISx4051Cls_2 , 0x300F),
   204   YYYY(kJISx4051Cls_2 , 0x3011),
   205   YYYY(kJISx4051Cls_2 , 0x3015),
   206   YYYY(kJISx4051Cls_2 , 0x3017),
   207   YYYY(kJISx4051Cls_2 , 0x3019),
   208   YYYY(kJISx4051Cls_2 , 0x301B),
   209   YYYY(kJISx4051Cls_2 , 0x301E),
   210   YYYY(kJISx4051Cls_2 , 0x301F),
   212   // Table 4
   213   YYYY(kJISx4051Cls_3 , 0x203C),
   214   YYYY(kJISx4051Cls_3 , 0x2044),
   215   YYYY(kJISx4051Cls_3 , 0x301C),
   216   YYYY(kJISx4051Cls_3 , 0x3041),
   217   YYYY(kJISx4051Cls_3 , 0x3043),
   218   YYYY(kJISx4051Cls_3 , 0x3045),
   219   YYYY(kJISx4051Cls_3 , 0x3047),
   220   YYYY(kJISx4051Cls_3 , 0x3049),
   221   YYYY(kJISx4051Cls_3 , 0x3063),
   222   YYYY(kJISx4051Cls_3 , 0x3083),
   223   YYYY(kJISx4051Cls_3 , 0x3085),
   224   YYYY(kJISx4051Cls_3 , 0x3087),
   225   YYYY(kJISx4051Cls_3 , 0x308E),
   226   YYYY(kJISx4051Cls_3 , 0x309D),
   227   YYYY(kJISx4051Cls_3 , 0x309E),
   228   YYYY(kJISx4051Cls_3 , 0x30A1),
   229   YYYY(kJISx4051Cls_3 , 0x30A3),
   230   YYYY(kJISx4051Cls_3 , 0x30A5),
   231   YYYY(kJISx4051Cls_3 , 0x30A7),
   232   YYYY(kJISx4051Cls_3 , 0x30A9),
   233   YYYY(kJISx4051Cls_3 , 0x30C3),
   234   YYYY(kJISx4051Cls_3 , 0x30E3),
   235   YYYY(kJISx4051Cls_3 , 0x30E5),
   236   YYYY(kJISx4051Cls_3 , 0x30E7),
   237   YYYY(kJISx4051Cls_3 , 0x30EE),
   238   YYYY(kJISx4051Cls_3 , 0x30F5),
   239   YYYY(kJISx4051Cls_3 , 0x30F6),
   240   YYYY(kJISx4051Cls_3 , 0x30FC),
   241   YYYY(kJISx4051Cls_3 , 0x30FD),
   242   YYYY(kJISx4051Cls_3 , 0x30FE),
   244   // Table 5
   245   YYYY(kJISx4051Cls_4 , 0x0021),
   246   YYYY(kJISx4051Cls_4 , 0x003F),
   248   // Table 6
   249   YYYY(kJISx4051Cls_5 , 0x003A),
   250   YYYY(kJISx4051Cls_5 , 0x003B),
   251   YYYY(kJISx4051Cls_5 , 0x30FB),
   253   // Table 7
   254   YYYY(kJISx4051Cls_6 , 0x002E),
   255   YYYY(kJISx4051Cls_6 , 0x3002),
   257   // Table 8
   258   YYYY(kJISx4051Cls_7 , 0x2014),
   259   YYYY(kJISx4051Cls_7 , 0x2024),
   260   YYYY(kJISx4051Cls_7 , 0x2025),
   261   YYYY(kJISx4051Cls_7 , 0x2026),
   263   // Table 9
   264   YYYY(kJISx4051Cls_8 , 0x0024),
   265   YYYY(kJISx4051Cls_8 , 0x00A3),
   266   YYYY(kJISx4051Cls_8 , 0x00A5),
   267   YYYY(kJISx4051Cls_8 , 0x2116),
   269   // Table 10
   270   YYYY(kJISx4051Cls_9 , 0x0025),
   271   YYYY(kJISx4051Cls_9 , 0x00A2),
   272   YYYY(kJISx4051Cls_9 , 0x00B0),
   273   YYYY(kJISx4051Cls_9 , 0x2030),
   274   YYYY(kJISx4051Cls_9 , 0x2031),
   275   YYYY(kJISx4051Cls_9 , 0x2032),
   276   YYYY(kJISx4051Cls_9 , 0x2033),
   278   // Table 1
   279   YYYY(kJISx4051Cls_10, 0x3000),
   281   // Table 1
   282   ZZZZ(kJISx4051Cls_11, 0x3000),

mercurial