michael@0: michael@0: michael@0: michael@0: /* michael@0: michael@0: Simplification of Pair Table in JIS X 4051 michael@0: michael@0: 1. The Origion Table - in 4.1.3 michael@0: michael@0: In JIS x 4051. The pair table is defined as below michael@0: michael@0: Class of michael@0: Leading Class of Trailing Char Class michael@0: Char michael@0: michael@0: 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20 michael@0: * # * # michael@0: 1 X X X X X X X X X X X X X X X X X X X X X E michael@0: 2 X X X X X X michael@0: 3 X X X X X X michael@0: 4 X X X X X X michael@0: 5 X X X X X X michael@0: 6 X X X X X X michael@0: 7 X X X X X X X michael@0: 8 X X X X X X E michael@0: 9 X X X X X X michael@0: 10 X X X X X X michael@0: 11 X X X X X X michael@0: 12 X X X X X X michael@0: 13 X X X X X X X michael@0: 14 X X X X X X X michael@0: 15 X X X X X X X X X michael@0: 16 X X X X X X X X michael@0: 17 X X X X X E michael@0: 18 X X X X X X X X X michael@0: 19 X E E E E E X X X X X X X X X X X X E X E E michael@0: 20 X X X X X E michael@0: michael@0: * Same Char michael@0: # Other Char michael@0: michael@0: 2. Simplified by remove the class which we do not care michael@0: michael@0: However, since we do not care about class 13(Subscript), 14(Ruby), michael@0: 19(split line note begin quote), and 20(split line note end quote) michael@0: we can simplify this par table into the following michael@0: michael@0: Class of michael@0: Leading Class of Trailing Char Class michael@0: Char michael@0: michael@0: 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18 michael@0: michael@0: 1 X X X X X X X X X X X X X X X X michael@0: 2 X X X X X michael@0: 3 X X X X X michael@0: 4 X X X X X michael@0: 5 X X X X X michael@0: 6 X X X X X michael@0: 7 X X X X X X michael@0: 8 X X X X X X michael@0: 9 X X X X X michael@0: 10 X X X X X michael@0: 11 X X X X X michael@0: 12 X X X X X michael@0: 15 X X X X X X X X michael@0: 16 X X X X X X X michael@0: 17 X X X X X michael@0: 18 X X X X X X X X michael@0: michael@0: 3. Simplified by merged classes michael@0: michael@0: After the 2 simplification, the pair table have some duplication michael@0: a. class 2, 3, 4, 5, 6, are the same- we can merged them michael@0: b. class 10, 11, 12, 17 are the same- we can merged them michael@0: michael@0: michael@0: Class of michael@0: Leading Class of Trailing Char Class michael@0: Char michael@0: michael@0: 1 [a] 7 8 9 [b]15 16 18 michael@0: michael@0: 1 X X X X X X X X X michael@0: [a] X michael@0: 7 X X michael@0: 8 X X michael@0: 9 X michael@0: [b] X michael@0: 15 X X X X michael@0: 16 X X X michael@0: 18 X X X X michael@0: michael@0: michael@0: 4. Now we use one bit to encode weather it is breakable, and use 2 bytes michael@0: for one row, then the bit table will look like: michael@0: michael@0: 18 <- 1 michael@0: michael@0: 1 0000 0001 1111 1111 = 0x01FF michael@0: [a] 0000 0000 0000 0010 = 0x0002 michael@0: 7 0000 0000 0000 0110 = 0x0006 michael@0: 8 0000 0000 0100 0010 = 0x0042 michael@0: 9 0000 0000 0000 0010 = 0x0002 michael@0: [b] 0000 0000 0000 0010 = 0x0042 michael@0: 15 0000 0001 0101 0010 = 0x0152 michael@0: 16 0000 0001 1000 0010 = 0x0182 michael@0: 17 0000 0001 1100 0010 = 0x01C2 michael@0: michael@0: */ michael@0: michael@0: static uint16_t gJISx4051SimplifiedPair[9] = { michael@0: 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2 michael@0: }; michael@0: michael@0: PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1) michael@0: { michael@0: NS_ASSERTION( (aCls1 < 9) "invalid class"); michael@0: NS_ASSERTION( (aCls2 < 9) "invalid class"); michael@0: return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) )); michael@0: } michael@0: michael@0: michael@0: #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039)) michael@0: michael@0: nsJISx4051Cls XXXX::GetClass( michael@0: PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0) michael@0: { michael@0: // take care the special case in cls 15 michael@0: if( ((0x2C == aChar) || (0x2E == aChar)) && michael@0: (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter))) michael@0: { michael@0: return kJISx4051Cls_15; michael@0: } michael@0: michael@0: nsJISx4051Cls cls; michael@0: if(gSingle->Lookup(aChar, &cls)) michael@0: return cls; michael@0: michael@0: if(gRange->Lookup(aChar, &cls)) michael@0: return cls; michael@0: michael@0: return kJISx4051Cls_15; michael@0: } michael@0: michael@0: michael@0: typedef enum { michael@0: kJISx4051Cls_1 = 0, michael@0: kJISx4051Cls_2 = 1, michael@0: kJISx4051Cls_3 = 1, michael@0: kJISx4051Cls_4 = 1, michael@0: kJISx4051Cls_5 = 1, michael@0: kJISx4051Cls_6 = 1, michael@0: kJISx4051Cls_7 = 2, michael@0: kJISx4051Cls_8 = 3, michael@0: kJISx4051Cls_9 = 4, michael@0: kJISx4051Cls_10 = 5, michael@0: kJISx4051Cls_11 = 5, michael@0: kJISx4051Cls_12 = 5, michael@0: // kJISx4051Cls_13 = 0, michael@0: // kJISx4051Cls_14 = 0, michael@0: kJISx4051Cls_15 = 6, michael@0: kJISx4051Cls_16 = 7, michael@0: kJISx4051Cls_17 = 5, michael@0: kJISx4051Cls_18 = 8, michael@0: // kJISx4051Cls_19 = 0, michael@0: // kJISx4051Cls_20 = 0 michael@0: } nsJISx4051Cls; michael@0: michael@0: michael@0: // Table 2 michael@0: YYYY(kJISx4051Cls_1 , 0x0028), michael@0: YYYY(kJISx4051Cls_1 , 0x005B), michael@0: YYYY(kJISx4051Cls_1 , 0x007B), michael@0: YYYY(kJISx4051Cls_1 , 0x2018), michael@0: YYYY(kJISx4051Cls_1 , 0x201B), michael@0: YYYY(kJISx4051Cls_1 , 0x201C), michael@0: YYYY(kJISx4051Cls_1 , 0x201F), michael@0: YYYY(kJISx4051Cls_1 , 0x3008), michael@0: YYYY(kJISx4051Cls_1 , 0x300A), michael@0: YYYY(kJISx4051Cls_1 , 0x300C), michael@0: YYYY(kJISx4051Cls_1 , 0x300E), michael@0: YYYY(kJISx4051Cls_1 , 0x3010), michael@0: YYYY(kJISx4051Cls_1 , 0x3014), michael@0: YYYY(kJISx4051Cls_1 , 0x3016), michael@0: YYYY(kJISx4051Cls_1 , 0x3018), michael@0: YYYY(kJISx4051Cls_1 , 0x301A), michael@0: YYYY(kJISx4051Cls_1 , 0x301D), michael@0: michael@0: // Table 3 michael@0: YYYY(kJISx4051Cls_2 , 0x0029), michael@0: YYYY(kJISx4051Cls_2 , 0x002C), michael@0: YYYY(kJISx4051Cls_2 , 0x005D), michael@0: YYYY(kJISx4051Cls_2 , 0x007D), michael@0: YYYY(kJISx4051Cls_2 , 0x2019), michael@0: YYYY(kJISx4051Cls_2 , 0x201A), michael@0: YYYY(kJISx4051Cls_2 , 0x201D), michael@0: YYYY(kJISx4051Cls_2 , 0x201E), michael@0: YYYY(kJISx4051Cls_2 , 0x3001), michael@0: YYYY(kJISx4051Cls_2 , 0x3009), michael@0: YYYY(kJISx4051Cls_2 , 0x300B), michael@0: YYYY(kJISx4051Cls_2 , 0x300D), michael@0: YYYY(kJISx4051Cls_2 , 0x300F), michael@0: YYYY(kJISx4051Cls_2 , 0x3011), michael@0: YYYY(kJISx4051Cls_2 , 0x3015), michael@0: YYYY(kJISx4051Cls_2 , 0x3017), michael@0: YYYY(kJISx4051Cls_2 , 0x3019), michael@0: YYYY(kJISx4051Cls_2 , 0x301B), michael@0: YYYY(kJISx4051Cls_2 , 0x301E), michael@0: YYYY(kJISx4051Cls_2 , 0x301F), michael@0: michael@0: // Table 4 michael@0: YYYY(kJISx4051Cls_3 , 0x203C), michael@0: YYYY(kJISx4051Cls_3 , 0x2044), michael@0: YYYY(kJISx4051Cls_3 , 0x301C), michael@0: YYYY(kJISx4051Cls_3 , 0x3041), michael@0: YYYY(kJISx4051Cls_3 , 0x3043), michael@0: YYYY(kJISx4051Cls_3 , 0x3045), michael@0: YYYY(kJISx4051Cls_3 , 0x3047), michael@0: YYYY(kJISx4051Cls_3 , 0x3049), michael@0: YYYY(kJISx4051Cls_3 , 0x3063), michael@0: YYYY(kJISx4051Cls_3 , 0x3083), michael@0: YYYY(kJISx4051Cls_3 , 0x3085), michael@0: YYYY(kJISx4051Cls_3 , 0x3087), michael@0: YYYY(kJISx4051Cls_3 , 0x308E), michael@0: YYYY(kJISx4051Cls_3 , 0x309D), michael@0: YYYY(kJISx4051Cls_3 , 0x309E), michael@0: YYYY(kJISx4051Cls_3 , 0x30A1), michael@0: YYYY(kJISx4051Cls_3 , 0x30A3), michael@0: YYYY(kJISx4051Cls_3 , 0x30A5), michael@0: YYYY(kJISx4051Cls_3 , 0x30A7), michael@0: YYYY(kJISx4051Cls_3 , 0x30A9), michael@0: YYYY(kJISx4051Cls_3 , 0x30C3), michael@0: YYYY(kJISx4051Cls_3 , 0x30E3), michael@0: YYYY(kJISx4051Cls_3 , 0x30E5), michael@0: YYYY(kJISx4051Cls_3 , 0x30E7), michael@0: YYYY(kJISx4051Cls_3 , 0x30EE), michael@0: YYYY(kJISx4051Cls_3 , 0x30F5), michael@0: YYYY(kJISx4051Cls_3 , 0x30F6), michael@0: YYYY(kJISx4051Cls_3 , 0x30FC), michael@0: YYYY(kJISx4051Cls_3 , 0x30FD), michael@0: YYYY(kJISx4051Cls_3 , 0x30FE), michael@0: michael@0: // Table 5 michael@0: YYYY(kJISx4051Cls_4 , 0x0021), michael@0: YYYY(kJISx4051Cls_4 , 0x003F), michael@0: michael@0: // Table 6 michael@0: YYYY(kJISx4051Cls_5 , 0x003A), michael@0: YYYY(kJISx4051Cls_5 , 0x003B), michael@0: YYYY(kJISx4051Cls_5 , 0x30FB), michael@0: michael@0: // Table 7 michael@0: YYYY(kJISx4051Cls_6 , 0x002E), michael@0: YYYY(kJISx4051Cls_6 , 0x3002), michael@0: michael@0: // Table 8 michael@0: YYYY(kJISx4051Cls_7 , 0x2014), michael@0: YYYY(kJISx4051Cls_7 , 0x2024), michael@0: YYYY(kJISx4051Cls_7 , 0x2025), michael@0: YYYY(kJISx4051Cls_7 , 0x2026), michael@0: michael@0: // Table 9 michael@0: YYYY(kJISx4051Cls_8 , 0x0024), michael@0: YYYY(kJISx4051Cls_8 , 0x00A3), michael@0: YYYY(kJISx4051Cls_8 , 0x00A5), michael@0: YYYY(kJISx4051Cls_8 , 0x2116), michael@0: michael@0: // Table 10 michael@0: YYYY(kJISx4051Cls_9 , 0x0025), michael@0: YYYY(kJISx4051Cls_9 , 0x00A2), michael@0: YYYY(kJISx4051Cls_9 , 0x00B0), michael@0: YYYY(kJISx4051Cls_9 , 0x2030), michael@0: YYYY(kJISx4051Cls_9 , 0x2031), michael@0: YYYY(kJISx4051Cls_9 , 0x2032), michael@0: YYYY(kJISx4051Cls_9 , 0x2033), michael@0: michael@0: // Table 1 michael@0: YYYY(kJISx4051Cls_10, 0x3000), michael@0: michael@0: // Table 1 michael@0: ZZZZ(kJISx4051Cls_11, 0x3000), michael@0: michael@0: michael@0: michael@0: