1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/lwbrk/src/jisx4051pairtable.txt Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,286 @@ 1.4 + 1.5 + 1.6 + 1.7 +/* 1.8 + 1.9 + Simplification of Pair Table in JIS X 4051 1.10 + 1.11 + 1. The Origion Table - in 4.1.3 1.12 + 1.13 + In JIS x 4051. The pair table is defined as below 1.14 + 1.15 + Class of 1.16 + Leading Class of Trailing Char Class 1.17 + Char 1.18 + 1.19 + 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20 1.20 + * # * # 1.21 + 1 X X X X X X X X X X X X X X X X X X X X X E 1.22 + 2 X X X X X X 1.23 + 3 X X X X X X 1.24 + 4 X X X X X X 1.25 + 5 X X X X X X 1.26 + 6 X X X X X X 1.27 + 7 X X X X X X X 1.28 + 8 X X X X X X E 1.29 + 9 X X X X X X 1.30 + 10 X X X X X X 1.31 + 11 X X X X X X 1.32 + 12 X X X X X X 1.33 + 13 X X X X X X X 1.34 + 14 X X X X X X X 1.35 + 15 X X X X X X X X X 1.36 + 16 X X X X X X X X 1.37 + 17 X X X X X E 1.38 + 18 X X X X X X X X X 1.39 + 19 X E E E E E X X X X X X X X X X X X E X E E 1.40 + 20 X X X X X E 1.41 + 1.42 + * Same Char 1.43 + # Other Char 1.44 + 1.45 + 2. Simplified by remove the class which we do not care 1.46 + 1.47 + However, since we do not care about class 13(Subscript), 14(Ruby), 1.48 + 19(split line note begin quote), and 20(split line note end quote) 1.49 + we can simplify this par table into the following 1.50 + 1.51 + Class of 1.52 + Leading Class of Trailing Char Class 1.53 + Char 1.54 + 1.55 + 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18 1.56 + 1.57 + 1 X X X X X X X X X X X X X X X X 1.58 + 2 X X X X X 1.59 + 3 X X X X X 1.60 + 4 X X X X X 1.61 + 5 X X X X X 1.62 + 6 X X X X X 1.63 + 7 X X X X X X 1.64 + 8 X X X X X X 1.65 + 9 X X X X X 1.66 + 10 X X X X X 1.67 + 11 X X X X X 1.68 + 12 X X X X X 1.69 + 15 X X X X X X X X 1.70 + 16 X X X X X X X 1.71 + 17 X X X X X 1.72 + 18 X X X X X X X X 1.73 + 1.74 + 3. Simplified by merged classes 1.75 + 1.76 + After the 2 simplification, the pair table have some duplication 1.77 + a. class 2, 3, 4, 5, 6, are the same- we can merged them 1.78 + b. class 10, 11, 12, 17 are the same- we can merged them 1.79 + 1.80 + 1.81 + Class of 1.82 + Leading Class of Trailing Char Class 1.83 + Char 1.84 + 1.85 + 1 [a] 7 8 9 [b]15 16 18 1.86 + 1.87 + 1 X X X X X X X X X 1.88 + [a] X 1.89 + 7 X X 1.90 + 8 X X 1.91 + 9 X 1.92 + [b] X 1.93 + 15 X X X X 1.94 + 16 X X X 1.95 + 18 X X X X 1.96 + 1.97 + 1.98 + 4. Now we use one bit to encode weather it is breakable, and use 2 bytes 1.99 + for one row, then the bit table will look like: 1.100 + 1.101 + 18 <- 1 1.102 + 1.103 + 1 0000 0001 1111 1111 = 0x01FF 1.104 + [a] 0000 0000 0000 0010 = 0x0002 1.105 + 7 0000 0000 0000 0110 = 0x0006 1.106 + 8 0000 0000 0100 0010 = 0x0042 1.107 + 9 0000 0000 0000 0010 = 0x0002 1.108 + [b] 0000 0000 0000 0010 = 0x0042 1.109 + 15 0000 0001 0101 0010 = 0x0152 1.110 + 16 0000 0001 1000 0010 = 0x0182 1.111 + 17 0000 0001 1100 0010 = 0x01C2 1.112 + 1.113 +*/ 1.114 + 1.115 +static uint16_t gJISx4051SimplifiedPair[9] = { 1.116 + 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2 1.117 +}; 1.118 + 1.119 +PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1) 1.120 +{ 1.121 + NS_ASSERTION( (aCls1 < 9) "invalid class"); 1.122 + NS_ASSERTION( (aCls2 < 9) "invalid class"); 1.123 + return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) )); 1.124 +} 1.125 + 1.126 + 1.127 +#define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039)) 1.128 + 1.129 +nsJISx4051Cls XXXX::GetClass( 1.130 + PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0) 1.131 +{ 1.132 + // take care the special case in cls 15 1.133 + if( ((0x2C == aChar) || (0x2E == aChar)) && 1.134 + (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter))) 1.135 + { 1.136 + return kJISx4051Cls_15; 1.137 + } 1.138 + 1.139 + nsJISx4051Cls cls; 1.140 + if(gSingle->Lookup(aChar, &cls)) 1.141 + return cls; 1.142 + 1.143 + if(gRange->Lookup(aChar, &cls)) 1.144 + return cls; 1.145 + 1.146 + return kJISx4051Cls_15; 1.147 +} 1.148 + 1.149 + 1.150 +typedef enum { 1.151 + kJISx4051Cls_1 = 0, 1.152 + kJISx4051Cls_2 = 1, 1.153 + kJISx4051Cls_3 = 1, 1.154 + kJISx4051Cls_4 = 1, 1.155 + kJISx4051Cls_5 = 1, 1.156 + kJISx4051Cls_6 = 1, 1.157 + kJISx4051Cls_7 = 2, 1.158 + kJISx4051Cls_8 = 3, 1.159 + kJISx4051Cls_9 = 4, 1.160 + kJISx4051Cls_10 = 5, 1.161 + kJISx4051Cls_11 = 5, 1.162 + kJISx4051Cls_12 = 5, 1.163 + // kJISx4051Cls_13 = 0, 1.164 + // kJISx4051Cls_14 = 0, 1.165 + kJISx4051Cls_15 = 6, 1.166 + kJISx4051Cls_16 = 7, 1.167 + kJISx4051Cls_17 = 5, 1.168 + kJISx4051Cls_18 = 8, 1.169 + // kJISx4051Cls_19 = 0, 1.170 + // kJISx4051Cls_20 = 0 1.171 +} nsJISx4051Cls; 1.172 + 1.173 + 1.174 + // Table 2 1.175 + YYYY(kJISx4051Cls_1 , 0x0028), 1.176 + YYYY(kJISx4051Cls_1 , 0x005B), 1.177 + YYYY(kJISx4051Cls_1 , 0x007B), 1.178 + YYYY(kJISx4051Cls_1 , 0x2018), 1.179 + YYYY(kJISx4051Cls_1 , 0x201B), 1.180 + YYYY(kJISx4051Cls_1 , 0x201C), 1.181 + YYYY(kJISx4051Cls_1 , 0x201F), 1.182 + YYYY(kJISx4051Cls_1 , 0x3008), 1.183 + YYYY(kJISx4051Cls_1 , 0x300A), 1.184 + YYYY(kJISx4051Cls_1 , 0x300C), 1.185 + YYYY(kJISx4051Cls_1 , 0x300E), 1.186 + YYYY(kJISx4051Cls_1 , 0x3010), 1.187 + YYYY(kJISx4051Cls_1 , 0x3014), 1.188 + YYYY(kJISx4051Cls_1 , 0x3016), 1.189 + YYYY(kJISx4051Cls_1 , 0x3018), 1.190 + YYYY(kJISx4051Cls_1 , 0x301A), 1.191 + YYYY(kJISx4051Cls_1 , 0x301D), 1.192 + 1.193 + // Table 3 1.194 + YYYY(kJISx4051Cls_2 , 0x0029), 1.195 + YYYY(kJISx4051Cls_2 , 0x002C), 1.196 + YYYY(kJISx4051Cls_2 , 0x005D), 1.197 + YYYY(kJISx4051Cls_2 , 0x007D), 1.198 + YYYY(kJISx4051Cls_2 , 0x2019), 1.199 + YYYY(kJISx4051Cls_2 , 0x201A), 1.200 + YYYY(kJISx4051Cls_2 , 0x201D), 1.201 + YYYY(kJISx4051Cls_2 , 0x201E), 1.202 + YYYY(kJISx4051Cls_2 , 0x3001), 1.203 + YYYY(kJISx4051Cls_2 , 0x3009), 1.204 + YYYY(kJISx4051Cls_2 , 0x300B), 1.205 + YYYY(kJISx4051Cls_2 , 0x300D), 1.206 + YYYY(kJISx4051Cls_2 , 0x300F), 1.207 + YYYY(kJISx4051Cls_2 , 0x3011), 1.208 + YYYY(kJISx4051Cls_2 , 0x3015), 1.209 + YYYY(kJISx4051Cls_2 , 0x3017), 1.210 + YYYY(kJISx4051Cls_2 , 0x3019), 1.211 + YYYY(kJISx4051Cls_2 , 0x301B), 1.212 + YYYY(kJISx4051Cls_2 , 0x301E), 1.213 + YYYY(kJISx4051Cls_2 , 0x301F), 1.214 + 1.215 + // Table 4 1.216 + YYYY(kJISx4051Cls_3 , 0x203C), 1.217 + YYYY(kJISx4051Cls_3 , 0x2044), 1.218 + YYYY(kJISx4051Cls_3 , 0x301C), 1.219 + YYYY(kJISx4051Cls_3 , 0x3041), 1.220 + YYYY(kJISx4051Cls_3 , 0x3043), 1.221 + YYYY(kJISx4051Cls_3 , 0x3045), 1.222 + YYYY(kJISx4051Cls_3 , 0x3047), 1.223 + YYYY(kJISx4051Cls_3 , 0x3049), 1.224 + YYYY(kJISx4051Cls_3 , 0x3063), 1.225 + YYYY(kJISx4051Cls_3 , 0x3083), 1.226 + YYYY(kJISx4051Cls_3 , 0x3085), 1.227 + YYYY(kJISx4051Cls_3 , 0x3087), 1.228 + YYYY(kJISx4051Cls_3 , 0x308E), 1.229 + YYYY(kJISx4051Cls_3 , 0x309D), 1.230 + YYYY(kJISx4051Cls_3 , 0x309E), 1.231 + YYYY(kJISx4051Cls_3 , 0x30A1), 1.232 + YYYY(kJISx4051Cls_3 , 0x30A3), 1.233 + YYYY(kJISx4051Cls_3 , 0x30A5), 1.234 + YYYY(kJISx4051Cls_3 , 0x30A7), 1.235 + YYYY(kJISx4051Cls_3 , 0x30A9), 1.236 + YYYY(kJISx4051Cls_3 , 0x30C3), 1.237 + YYYY(kJISx4051Cls_3 , 0x30E3), 1.238 + YYYY(kJISx4051Cls_3 , 0x30E5), 1.239 + YYYY(kJISx4051Cls_3 , 0x30E7), 1.240 + YYYY(kJISx4051Cls_3 , 0x30EE), 1.241 + YYYY(kJISx4051Cls_3 , 0x30F5), 1.242 + YYYY(kJISx4051Cls_3 , 0x30F6), 1.243 + YYYY(kJISx4051Cls_3 , 0x30FC), 1.244 + YYYY(kJISx4051Cls_3 , 0x30FD), 1.245 + YYYY(kJISx4051Cls_3 , 0x30FE), 1.246 + 1.247 + // Table 5 1.248 + YYYY(kJISx4051Cls_4 , 0x0021), 1.249 + YYYY(kJISx4051Cls_4 , 0x003F), 1.250 + 1.251 + // Table 6 1.252 + YYYY(kJISx4051Cls_5 , 0x003A), 1.253 + YYYY(kJISx4051Cls_5 , 0x003B), 1.254 + YYYY(kJISx4051Cls_5 , 0x30FB), 1.255 + 1.256 + // Table 7 1.257 + YYYY(kJISx4051Cls_6 , 0x002E), 1.258 + YYYY(kJISx4051Cls_6 , 0x3002), 1.259 + 1.260 + // Table 8 1.261 + YYYY(kJISx4051Cls_7 , 0x2014), 1.262 + YYYY(kJISx4051Cls_7 , 0x2024), 1.263 + YYYY(kJISx4051Cls_7 , 0x2025), 1.264 + YYYY(kJISx4051Cls_7 , 0x2026), 1.265 + 1.266 + // Table 9 1.267 + YYYY(kJISx4051Cls_8 , 0x0024), 1.268 + YYYY(kJISx4051Cls_8 , 0x00A3), 1.269 + YYYY(kJISx4051Cls_8 , 0x00A5), 1.270 + YYYY(kJISx4051Cls_8 , 0x2116), 1.271 + 1.272 + // Table 10 1.273 + YYYY(kJISx4051Cls_9 , 0x0025), 1.274 + YYYY(kJISx4051Cls_9 , 0x00A2), 1.275 + YYYY(kJISx4051Cls_9 , 0x00B0), 1.276 + YYYY(kJISx4051Cls_9 , 0x2030), 1.277 + YYYY(kJISx4051Cls_9 , 0x2031), 1.278 + YYYY(kJISx4051Cls_9 , 0x2032), 1.279 + YYYY(kJISx4051Cls_9 , 0x2033), 1.280 + 1.281 + // Table 1 1.282 + YYYY(kJISx4051Cls_10, 0x3000), 1.283 + 1.284 + // Table 1 1.285 + ZZZZ(kJISx4051Cls_11, 0x3000), 1.286 + 1.287 + 1.288 + 1.289 +