intl/lwbrk/src/jisx4051pairtable.txt

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:d2b8643877b0
1
2
3
4 /*
5
6 Simplification of Pair Table in JIS X 4051
7
8 1. The Origion Table - in 4.1.3
9
10 In JIS x 4051. The pair table is defined as below
11
12 Class of
13 Leading Class of Trailing Char Class
14 Char
15
16 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20
17 * # * #
18 1 X X X X X X X X X X X X X X X X X X X X X E
19 2 X X X X X X
20 3 X X X X X X
21 4 X X X X X X
22 5 X X X X X X
23 6 X X X X X X
24 7 X X X X X X X
25 8 X X X X X X E
26 9 X X X X X X
27 10 X X X X X X
28 11 X X X X X X
29 12 X X X X X X
30 13 X X X X X X X
31 14 X X X X X X X
32 15 X X X X X X X X X
33 16 X X X X X X X X
34 17 X X X X X E
35 18 X X X X X X X X X
36 19 X E E E E E X X X X X X X X X X X X E X E E
37 20 X X X X X E
38
39 * Same Char
40 # Other Char
41
42 2. Simplified by remove the class which we do not care
43
44 However, since we do not care about class 13(Subscript), 14(Ruby),
45 19(split line note begin quote), and 20(split line note end quote)
46 we can simplify this par table into the following
47
48 Class of
49 Leading Class of Trailing Char Class
50 Char
51
52 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18
53
54 1 X X X X X X X X X X X X X X X X
55 2 X X X X X
56 3 X X X X X
57 4 X X X X X
58 5 X X X X X
59 6 X X X X X
60 7 X X X X X X
61 8 X X X X X X
62 9 X X X X X
63 10 X X X X X
64 11 X X X X X
65 12 X X X X X
66 15 X X X X X X X X
67 16 X X X X X X X
68 17 X X X X X
69 18 X X X X X X X X
70
71 3. Simplified by merged classes
72
73 After the 2 simplification, the pair table have some duplication
74 a. class 2, 3, 4, 5, 6, are the same- we can merged them
75 b. class 10, 11, 12, 17 are the same- we can merged them
76
77
78 Class of
79 Leading Class of Trailing Char Class
80 Char
81
82 1 [a] 7 8 9 [b]15 16 18
83
84 1 X X X X X X X X X
85 [a] X
86 7 X X
87 8 X X
88 9 X
89 [b] X
90 15 X X X X
91 16 X X X
92 18 X X X X
93
94
95 4. Now we use one bit to encode weather it is breakable, and use 2 bytes
96 for one row, then the bit table will look like:
97
98 18 <- 1
99
100 1 0000 0001 1111 1111 = 0x01FF
101 [a] 0000 0000 0000 0010 = 0x0002
102 7 0000 0000 0000 0110 = 0x0006
103 8 0000 0000 0100 0010 = 0x0042
104 9 0000 0000 0000 0010 = 0x0002
105 [b] 0000 0000 0000 0010 = 0x0042
106 15 0000 0001 0101 0010 = 0x0152
107 16 0000 0001 1000 0010 = 0x0182
108 17 0000 0001 1100 0010 = 0x01C2
109
110 */
111
112 static uint16_t gJISx4051SimplifiedPair[9] = {
113 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
114 };
115
116 PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
117 {
118 NS_ASSERTION( (aCls1 < 9) "invalid class");
119 NS_ASSERTION( (aCls2 < 9) "invalid class");
120 return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
121 }
122
123
124 #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
125
126 nsJISx4051Cls XXXX::GetClass(
127 PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
128 {
129 // take care the special case in cls 15
130 if( ((0x2C == aChar) || (0x2E == aChar)) &&
131 (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
132 {
133 return kJISx4051Cls_15;
134 }
135
136 nsJISx4051Cls cls;
137 if(gSingle->Lookup(aChar, &cls))
138 return cls;
139
140 if(gRange->Lookup(aChar, &cls))
141 return cls;
142
143 return kJISx4051Cls_15;
144 }
145
146
147 typedef enum {
148 kJISx4051Cls_1 = 0,
149 kJISx4051Cls_2 = 1,
150 kJISx4051Cls_3 = 1,
151 kJISx4051Cls_4 = 1,
152 kJISx4051Cls_5 = 1,
153 kJISx4051Cls_6 = 1,
154 kJISx4051Cls_7 = 2,
155 kJISx4051Cls_8 = 3,
156 kJISx4051Cls_9 = 4,
157 kJISx4051Cls_10 = 5,
158 kJISx4051Cls_11 = 5,
159 kJISx4051Cls_12 = 5,
160 // kJISx4051Cls_13 = 0,
161 // kJISx4051Cls_14 = 0,
162 kJISx4051Cls_15 = 6,
163 kJISx4051Cls_16 = 7,
164 kJISx4051Cls_17 = 5,
165 kJISx4051Cls_18 = 8,
166 // kJISx4051Cls_19 = 0,
167 // kJISx4051Cls_20 = 0
168 } nsJISx4051Cls;
169
170
171 // Table 2
172 YYYY(kJISx4051Cls_1 , 0x0028),
173 YYYY(kJISx4051Cls_1 , 0x005B),
174 YYYY(kJISx4051Cls_1 , 0x007B),
175 YYYY(kJISx4051Cls_1 , 0x2018),
176 YYYY(kJISx4051Cls_1 , 0x201B),
177 YYYY(kJISx4051Cls_1 , 0x201C),
178 YYYY(kJISx4051Cls_1 , 0x201F),
179 YYYY(kJISx4051Cls_1 , 0x3008),
180 YYYY(kJISx4051Cls_1 , 0x300A),
181 YYYY(kJISx4051Cls_1 , 0x300C),
182 YYYY(kJISx4051Cls_1 , 0x300E),
183 YYYY(kJISx4051Cls_1 , 0x3010),
184 YYYY(kJISx4051Cls_1 , 0x3014),
185 YYYY(kJISx4051Cls_1 , 0x3016),
186 YYYY(kJISx4051Cls_1 , 0x3018),
187 YYYY(kJISx4051Cls_1 , 0x301A),
188 YYYY(kJISx4051Cls_1 , 0x301D),
189
190 // Table 3
191 YYYY(kJISx4051Cls_2 , 0x0029),
192 YYYY(kJISx4051Cls_2 , 0x002C),
193 YYYY(kJISx4051Cls_2 , 0x005D),
194 YYYY(kJISx4051Cls_2 , 0x007D),
195 YYYY(kJISx4051Cls_2 , 0x2019),
196 YYYY(kJISx4051Cls_2 , 0x201A),
197 YYYY(kJISx4051Cls_2 , 0x201D),
198 YYYY(kJISx4051Cls_2 , 0x201E),
199 YYYY(kJISx4051Cls_2 , 0x3001),
200 YYYY(kJISx4051Cls_2 , 0x3009),
201 YYYY(kJISx4051Cls_2 , 0x300B),
202 YYYY(kJISx4051Cls_2 , 0x300D),
203 YYYY(kJISx4051Cls_2 , 0x300F),
204 YYYY(kJISx4051Cls_2 , 0x3011),
205 YYYY(kJISx4051Cls_2 , 0x3015),
206 YYYY(kJISx4051Cls_2 , 0x3017),
207 YYYY(kJISx4051Cls_2 , 0x3019),
208 YYYY(kJISx4051Cls_2 , 0x301B),
209 YYYY(kJISx4051Cls_2 , 0x301E),
210 YYYY(kJISx4051Cls_2 , 0x301F),
211
212 // Table 4
213 YYYY(kJISx4051Cls_3 , 0x203C),
214 YYYY(kJISx4051Cls_3 , 0x2044),
215 YYYY(kJISx4051Cls_3 , 0x301C),
216 YYYY(kJISx4051Cls_3 , 0x3041),
217 YYYY(kJISx4051Cls_3 , 0x3043),
218 YYYY(kJISx4051Cls_3 , 0x3045),
219 YYYY(kJISx4051Cls_3 , 0x3047),
220 YYYY(kJISx4051Cls_3 , 0x3049),
221 YYYY(kJISx4051Cls_3 , 0x3063),
222 YYYY(kJISx4051Cls_3 , 0x3083),
223 YYYY(kJISx4051Cls_3 , 0x3085),
224 YYYY(kJISx4051Cls_3 , 0x3087),
225 YYYY(kJISx4051Cls_3 , 0x308E),
226 YYYY(kJISx4051Cls_3 , 0x309D),
227 YYYY(kJISx4051Cls_3 , 0x309E),
228 YYYY(kJISx4051Cls_3 , 0x30A1),
229 YYYY(kJISx4051Cls_3 , 0x30A3),
230 YYYY(kJISx4051Cls_3 , 0x30A5),
231 YYYY(kJISx4051Cls_3 , 0x30A7),
232 YYYY(kJISx4051Cls_3 , 0x30A9),
233 YYYY(kJISx4051Cls_3 , 0x30C3),
234 YYYY(kJISx4051Cls_3 , 0x30E3),
235 YYYY(kJISx4051Cls_3 , 0x30E5),
236 YYYY(kJISx4051Cls_3 , 0x30E7),
237 YYYY(kJISx4051Cls_3 , 0x30EE),
238 YYYY(kJISx4051Cls_3 , 0x30F5),
239 YYYY(kJISx4051Cls_3 , 0x30F6),
240 YYYY(kJISx4051Cls_3 , 0x30FC),
241 YYYY(kJISx4051Cls_3 , 0x30FD),
242 YYYY(kJISx4051Cls_3 , 0x30FE),
243
244 // Table 5
245 YYYY(kJISx4051Cls_4 , 0x0021),
246 YYYY(kJISx4051Cls_4 , 0x003F),
247
248 // Table 6
249 YYYY(kJISx4051Cls_5 , 0x003A),
250 YYYY(kJISx4051Cls_5 , 0x003B),
251 YYYY(kJISx4051Cls_5 , 0x30FB),
252
253 // Table 7
254 YYYY(kJISx4051Cls_6 , 0x002E),
255 YYYY(kJISx4051Cls_6 , 0x3002),
256
257 // Table 8
258 YYYY(kJISx4051Cls_7 , 0x2014),
259 YYYY(kJISx4051Cls_7 , 0x2024),
260 YYYY(kJISx4051Cls_7 , 0x2025),
261 YYYY(kJISx4051Cls_7 , 0x2026),
262
263 // Table 9
264 YYYY(kJISx4051Cls_8 , 0x0024),
265 YYYY(kJISx4051Cls_8 , 0x00A3),
266 YYYY(kJISx4051Cls_8 , 0x00A5),
267 YYYY(kJISx4051Cls_8 , 0x2116),
268
269 // Table 10
270 YYYY(kJISx4051Cls_9 , 0x0025),
271 YYYY(kJISx4051Cls_9 , 0x00A2),
272 YYYY(kJISx4051Cls_9 , 0x00B0),
273 YYYY(kJISx4051Cls_9 , 0x2030),
274 YYYY(kJISx4051Cls_9 , 0x2031),
275 YYYY(kJISx4051Cls_9 , 0x2032),
276 YYYY(kJISx4051Cls_9 , 0x2033),
277
278 // Table 1
279 YYYY(kJISx4051Cls_10, 0x3000),
280
281 // Table 1
282 ZZZZ(kJISx4051Cls_11, 0x3000),
283
284
285
286

mercurial