|
1 |
|
2 |
|
3 |
|
4 /* |
|
5 |
|
6 Simplification of Pair Table in JIS X 4051 |
|
7 |
|
8 1. The Origion Table - in 4.1.3 |
|
9 |
|
10 In JIS x 4051. The pair table is defined as below |
|
11 |
|
12 Class of |
|
13 Leading Class of Trailing Char Class |
|
14 Char |
|
15 |
|
16 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20 |
|
17 * # * # |
|
18 1 X X X X X X X X X X X X X X X X X X X X X E |
|
19 2 X X X X X X |
|
20 3 X X X X X X |
|
21 4 X X X X X X |
|
22 5 X X X X X X |
|
23 6 X X X X X X |
|
24 7 X X X X X X X |
|
25 8 X X X X X X E |
|
26 9 X X X X X X |
|
27 10 X X X X X X |
|
28 11 X X X X X X |
|
29 12 X X X X X X |
|
30 13 X X X X X X X |
|
31 14 X X X X X X X |
|
32 15 X X X X X X X X X |
|
33 16 X X X X X X X X |
|
34 17 X X X X X E |
|
35 18 X X X X X X X X X |
|
36 19 X E E E E E X X X X X X X X X X X X E X E E |
|
37 20 X X X X X E |
|
38 |
|
39 * Same Char |
|
40 # Other Char |
|
41 |
|
42 2. Simplified by remove the class which we do not care |
|
43 |
|
44 However, since we do not care about class 13(Subscript), 14(Ruby), |
|
45 19(split line note begin quote), and 20(split line note end quote) |
|
46 we can simplify this par table into the following |
|
47 |
|
48 Class of |
|
49 Leading Class of Trailing Char Class |
|
50 Char |
|
51 |
|
52 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18 |
|
53 |
|
54 1 X X X X X X X X X X X X X X X X |
|
55 2 X X X X X |
|
56 3 X X X X X |
|
57 4 X X X X X |
|
58 5 X X X X X |
|
59 6 X X X X X |
|
60 7 X X X X X X |
|
61 8 X X X X X X |
|
62 9 X X X X X |
|
63 10 X X X X X |
|
64 11 X X X X X |
|
65 12 X X X X X |
|
66 15 X X X X X X X X |
|
67 16 X X X X X X X |
|
68 17 X X X X X |
|
69 18 X X X X X X X X |
|
70 |
|
71 3. Simplified by merged classes |
|
72 |
|
73 After the 2 simplification, the pair table have some duplication |
|
74 a. class 2, 3, 4, 5, 6, are the same- we can merged them |
|
75 b. class 10, 11, 12, 17 are the same- we can merged them |
|
76 |
|
77 |
|
78 Class of |
|
79 Leading Class of Trailing Char Class |
|
80 Char |
|
81 |
|
82 1 [a] 7 8 9 [b]15 16 18 |
|
83 |
|
84 1 X X X X X X X X X |
|
85 [a] X |
|
86 7 X X |
|
87 8 X X |
|
88 9 X |
|
89 [b] X |
|
90 15 X X X X |
|
91 16 X X X |
|
92 18 X X X X |
|
93 |
|
94 |
|
95 4. Now we use one bit to encode weather it is breakable, and use 2 bytes |
|
96 for one row, then the bit table will look like: |
|
97 |
|
98 18 <- 1 |
|
99 |
|
100 1 0000 0001 1111 1111 = 0x01FF |
|
101 [a] 0000 0000 0000 0010 = 0x0002 |
|
102 7 0000 0000 0000 0110 = 0x0006 |
|
103 8 0000 0000 0100 0010 = 0x0042 |
|
104 9 0000 0000 0000 0010 = 0x0002 |
|
105 [b] 0000 0000 0000 0010 = 0x0042 |
|
106 15 0000 0001 0101 0010 = 0x0152 |
|
107 16 0000 0001 1000 0010 = 0x0182 |
|
108 17 0000 0001 1100 0010 = 0x01C2 |
|
109 |
|
110 */ |
|
111 |
|
112 static uint16_t gJISx4051SimplifiedPair[9] = { |
|
113 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2 |
|
114 }; |
|
115 |
|
116 PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1) |
|
117 { |
|
118 NS_ASSERTION( (aCls1 < 9) "invalid class"); |
|
119 NS_ASSERTION( (aCls2 < 9) "invalid class"); |
|
120 return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) )); |
|
121 } |
|
122 |
|
123 |
|
124 #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039)) |
|
125 |
|
126 nsJISx4051Cls XXXX::GetClass( |
|
127 PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0) |
|
128 { |
|
129 // take care the special case in cls 15 |
|
130 if( ((0x2C == aChar) || (0x2E == aChar)) && |
|
131 (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter))) |
|
132 { |
|
133 return kJISx4051Cls_15; |
|
134 } |
|
135 |
|
136 nsJISx4051Cls cls; |
|
137 if(gSingle->Lookup(aChar, &cls)) |
|
138 return cls; |
|
139 |
|
140 if(gRange->Lookup(aChar, &cls)) |
|
141 return cls; |
|
142 |
|
143 return kJISx4051Cls_15; |
|
144 } |
|
145 |
|
146 |
|
147 typedef enum { |
|
148 kJISx4051Cls_1 = 0, |
|
149 kJISx4051Cls_2 = 1, |
|
150 kJISx4051Cls_3 = 1, |
|
151 kJISx4051Cls_4 = 1, |
|
152 kJISx4051Cls_5 = 1, |
|
153 kJISx4051Cls_6 = 1, |
|
154 kJISx4051Cls_7 = 2, |
|
155 kJISx4051Cls_8 = 3, |
|
156 kJISx4051Cls_9 = 4, |
|
157 kJISx4051Cls_10 = 5, |
|
158 kJISx4051Cls_11 = 5, |
|
159 kJISx4051Cls_12 = 5, |
|
160 // kJISx4051Cls_13 = 0, |
|
161 // kJISx4051Cls_14 = 0, |
|
162 kJISx4051Cls_15 = 6, |
|
163 kJISx4051Cls_16 = 7, |
|
164 kJISx4051Cls_17 = 5, |
|
165 kJISx4051Cls_18 = 8, |
|
166 // kJISx4051Cls_19 = 0, |
|
167 // kJISx4051Cls_20 = 0 |
|
168 } nsJISx4051Cls; |
|
169 |
|
170 |
|
171 // Table 2 |
|
172 YYYY(kJISx4051Cls_1 , 0x0028), |
|
173 YYYY(kJISx4051Cls_1 , 0x005B), |
|
174 YYYY(kJISx4051Cls_1 , 0x007B), |
|
175 YYYY(kJISx4051Cls_1 , 0x2018), |
|
176 YYYY(kJISx4051Cls_1 , 0x201B), |
|
177 YYYY(kJISx4051Cls_1 , 0x201C), |
|
178 YYYY(kJISx4051Cls_1 , 0x201F), |
|
179 YYYY(kJISx4051Cls_1 , 0x3008), |
|
180 YYYY(kJISx4051Cls_1 , 0x300A), |
|
181 YYYY(kJISx4051Cls_1 , 0x300C), |
|
182 YYYY(kJISx4051Cls_1 , 0x300E), |
|
183 YYYY(kJISx4051Cls_1 , 0x3010), |
|
184 YYYY(kJISx4051Cls_1 , 0x3014), |
|
185 YYYY(kJISx4051Cls_1 , 0x3016), |
|
186 YYYY(kJISx4051Cls_1 , 0x3018), |
|
187 YYYY(kJISx4051Cls_1 , 0x301A), |
|
188 YYYY(kJISx4051Cls_1 , 0x301D), |
|
189 |
|
190 // Table 3 |
|
191 YYYY(kJISx4051Cls_2 , 0x0029), |
|
192 YYYY(kJISx4051Cls_2 , 0x002C), |
|
193 YYYY(kJISx4051Cls_2 , 0x005D), |
|
194 YYYY(kJISx4051Cls_2 , 0x007D), |
|
195 YYYY(kJISx4051Cls_2 , 0x2019), |
|
196 YYYY(kJISx4051Cls_2 , 0x201A), |
|
197 YYYY(kJISx4051Cls_2 , 0x201D), |
|
198 YYYY(kJISx4051Cls_2 , 0x201E), |
|
199 YYYY(kJISx4051Cls_2 , 0x3001), |
|
200 YYYY(kJISx4051Cls_2 , 0x3009), |
|
201 YYYY(kJISx4051Cls_2 , 0x300B), |
|
202 YYYY(kJISx4051Cls_2 , 0x300D), |
|
203 YYYY(kJISx4051Cls_2 , 0x300F), |
|
204 YYYY(kJISx4051Cls_2 , 0x3011), |
|
205 YYYY(kJISx4051Cls_2 , 0x3015), |
|
206 YYYY(kJISx4051Cls_2 , 0x3017), |
|
207 YYYY(kJISx4051Cls_2 , 0x3019), |
|
208 YYYY(kJISx4051Cls_2 , 0x301B), |
|
209 YYYY(kJISx4051Cls_2 , 0x301E), |
|
210 YYYY(kJISx4051Cls_2 , 0x301F), |
|
211 |
|
212 // Table 4 |
|
213 YYYY(kJISx4051Cls_3 , 0x203C), |
|
214 YYYY(kJISx4051Cls_3 , 0x2044), |
|
215 YYYY(kJISx4051Cls_3 , 0x301C), |
|
216 YYYY(kJISx4051Cls_3 , 0x3041), |
|
217 YYYY(kJISx4051Cls_3 , 0x3043), |
|
218 YYYY(kJISx4051Cls_3 , 0x3045), |
|
219 YYYY(kJISx4051Cls_3 , 0x3047), |
|
220 YYYY(kJISx4051Cls_3 , 0x3049), |
|
221 YYYY(kJISx4051Cls_3 , 0x3063), |
|
222 YYYY(kJISx4051Cls_3 , 0x3083), |
|
223 YYYY(kJISx4051Cls_3 , 0x3085), |
|
224 YYYY(kJISx4051Cls_3 , 0x3087), |
|
225 YYYY(kJISx4051Cls_3 , 0x308E), |
|
226 YYYY(kJISx4051Cls_3 , 0x309D), |
|
227 YYYY(kJISx4051Cls_3 , 0x309E), |
|
228 YYYY(kJISx4051Cls_3 , 0x30A1), |
|
229 YYYY(kJISx4051Cls_3 , 0x30A3), |
|
230 YYYY(kJISx4051Cls_3 , 0x30A5), |
|
231 YYYY(kJISx4051Cls_3 , 0x30A7), |
|
232 YYYY(kJISx4051Cls_3 , 0x30A9), |
|
233 YYYY(kJISx4051Cls_3 , 0x30C3), |
|
234 YYYY(kJISx4051Cls_3 , 0x30E3), |
|
235 YYYY(kJISx4051Cls_3 , 0x30E5), |
|
236 YYYY(kJISx4051Cls_3 , 0x30E7), |
|
237 YYYY(kJISx4051Cls_3 , 0x30EE), |
|
238 YYYY(kJISx4051Cls_3 , 0x30F5), |
|
239 YYYY(kJISx4051Cls_3 , 0x30F6), |
|
240 YYYY(kJISx4051Cls_3 , 0x30FC), |
|
241 YYYY(kJISx4051Cls_3 , 0x30FD), |
|
242 YYYY(kJISx4051Cls_3 , 0x30FE), |
|
243 |
|
244 // Table 5 |
|
245 YYYY(kJISx4051Cls_4 , 0x0021), |
|
246 YYYY(kJISx4051Cls_4 , 0x003F), |
|
247 |
|
248 // Table 6 |
|
249 YYYY(kJISx4051Cls_5 , 0x003A), |
|
250 YYYY(kJISx4051Cls_5 , 0x003B), |
|
251 YYYY(kJISx4051Cls_5 , 0x30FB), |
|
252 |
|
253 // Table 7 |
|
254 YYYY(kJISx4051Cls_6 , 0x002E), |
|
255 YYYY(kJISx4051Cls_6 , 0x3002), |
|
256 |
|
257 // Table 8 |
|
258 YYYY(kJISx4051Cls_7 , 0x2014), |
|
259 YYYY(kJISx4051Cls_7 , 0x2024), |
|
260 YYYY(kJISx4051Cls_7 , 0x2025), |
|
261 YYYY(kJISx4051Cls_7 , 0x2026), |
|
262 |
|
263 // Table 9 |
|
264 YYYY(kJISx4051Cls_8 , 0x0024), |
|
265 YYYY(kJISx4051Cls_8 , 0x00A3), |
|
266 YYYY(kJISx4051Cls_8 , 0x00A5), |
|
267 YYYY(kJISx4051Cls_8 , 0x2116), |
|
268 |
|
269 // Table 10 |
|
270 YYYY(kJISx4051Cls_9 , 0x0025), |
|
271 YYYY(kJISx4051Cls_9 , 0x00A2), |
|
272 YYYY(kJISx4051Cls_9 , 0x00B0), |
|
273 YYYY(kJISx4051Cls_9 , 0x2030), |
|
274 YYYY(kJISx4051Cls_9 , 0x2031), |
|
275 YYYY(kJISx4051Cls_9 , 0x2032), |
|
276 YYYY(kJISx4051Cls_9 , 0x2033), |
|
277 |
|
278 // Table 1 |
|
279 YYYY(kJISx4051Cls_10, 0x3000), |
|
280 |
|
281 // Table 1 |
|
282 ZZZZ(kJISx4051Cls_11, 0x3000), |
|
283 |
|
284 |
|
285 |
|
286 |