intl/lwbrk/src/jisx4051pairtable.txt

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1
michael@0 2
michael@0 3
michael@0 4 /*
michael@0 5
michael@0 6 Simplification of Pair Table in JIS X 4051
michael@0 7
michael@0 8 1. The Origion Table - in 4.1.3
michael@0 9
michael@0 10 In JIS x 4051. The pair table is defined as below
michael@0 11
michael@0 12 Class of
michael@0 13 Leading Class of Trailing Char Class
michael@0 14 Char
michael@0 15
michael@0 16 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20
michael@0 17 * # * #
michael@0 18 1 X X X X X X X X X X X X X X X X X X X X X E
michael@0 19 2 X X X X X X
michael@0 20 3 X X X X X X
michael@0 21 4 X X X X X X
michael@0 22 5 X X X X X X
michael@0 23 6 X X X X X X
michael@0 24 7 X X X X X X X
michael@0 25 8 X X X X X X E
michael@0 26 9 X X X X X X
michael@0 27 10 X X X X X X
michael@0 28 11 X X X X X X
michael@0 29 12 X X X X X X
michael@0 30 13 X X X X X X X
michael@0 31 14 X X X X X X X
michael@0 32 15 X X X X X X X X X
michael@0 33 16 X X X X X X X X
michael@0 34 17 X X X X X E
michael@0 35 18 X X X X X X X X X
michael@0 36 19 X E E E E E X X X X X X X X X X X X E X E E
michael@0 37 20 X X X X X E
michael@0 38
michael@0 39 * Same Char
michael@0 40 # Other Char
michael@0 41
michael@0 42 2. Simplified by remove the class which we do not care
michael@0 43
michael@0 44 However, since we do not care about class 13(Subscript), 14(Ruby),
michael@0 45 19(split line note begin quote), and 20(split line note end quote)
michael@0 46 we can simplify this par table into the following
michael@0 47
michael@0 48 Class of
michael@0 49 Leading Class of Trailing Char Class
michael@0 50 Char
michael@0 51
michael@0 52 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18
michael@0 53
michael@0 54 1 X X X X X X X X X X X X X X X X
michael@0 55 2 X X X X X
michael@0 56 3 X X X X X
michael@0 57 4 X X X X X
michael@0 58 5 X X X X X
michael@0 59 6 X X X X X
michael@0 60 7 X X X X X X
michael@0 61 8 X X X X X X
michael@0 62 9 X X X X X
michael@0 63 10 X X X X X
michael@0 64 11 X X X X X
michael@0 65 12 X X X X X
michael@0 66 15 X X X X X X X X
michael@0 67 16 X X X X X X X
michael@0 68 17 X X X X X
michael@0 69 18 X X X X X X X X
michael@0 70
michael@0 71 3. Simplified by merged classes
michael@0 72
michael@0 73 After the 2 simplification, the pair table have some duplication
michael@0 74 a. class 2, 3, 4, 5, 6, are the same- we can merged them
michael@0 75 b. class 10, 11, 12, 17 are the same- we can merged them
michael@0 76
michael@0 77
michael@0 78 Class of
michael@0 79 Leading Class of Trailing Char Class
michael@0 80 Char
michael@0 81
michael@0 82 1 [a] 7 8 9 [b]15 16 18
michael@0 83
michael@0 84 1 X X X X X X X X X
michael@0 85 [a] X
michael@0 86 7 X X
michael@0 87 8 X X
michael@0 88 9 X
michael@0 89 [b] X
michael@0 90 15 X X X X
michael@0 91 16 X X X
michael@0 92 18 X X X X
michael@0 93
michael@0 94
michael@0 95 4. Now we use one bit to encode weather it is breakable, and use 2 bytes
michael@0 96 for one row, then the bit table will look like:
michael@0 97
michael@0 98 18 <- 1
michael@0 99
michael@0 100 1 0000 0001 1111 1111 = 0x01FF
michael@0 101 [a] 0000 0000 0000 0010 = 0x0002
michael@0 102 7 0000 0000 0000 0110 = 0x0006
michael@0 103 8 0000 0000 0100 0010 = 0x0042
michael@0 104 9 0000 0000 0000 0010 = 0x0002
michael@0 105 [b] 0000 0000 0000 0010 = 0x0042
michael@0 106 15 0000 0001 0101 0010 = 0x0152
michael@0 107 16 0000 0001 1000 0010 = 0x0182
michael@0 108 17 0000 0001 1100 0010 = 0x01C2
michael@0 109
michael@0 110 */
michael@0 111
michael@0 112 static uint16_t gJISx4051SimplifiedPair[9] = {
michael@0 113 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
michael@0 114 };
michael@0 115
michael@0 116 PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
michael@0 117 {
michael@0 118 NS_ASSERTION( (aCls1 < 9) "invalid class");
michael@0 119 NS_ASSERTION( (aCls2 < 9) "invalid class");
michael@0 120 return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
michael@0 121 }
michael@0 122
michael@0 123
michael@0 124 #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
michael@0 125
michael@0 126 nsJISx4051Cls XXXX::GetClass(
michael@0 127 PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
michael@0 128 {
michael@0 129 // take care the special case in cls 15
michael@0 130 if( ((0x2C == aChar) || (0x2E == aChar)) &&
michael@0 131 (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
michael@0 132 {
michael@0 133 return kJISx4051Cls_15;
michael@0 134 }
michael@0 135
michael@0 136 nsJISx4051Cls cls;
michael@0 137 if(gSingle->Lookup(aChar, &cls))
michael@0 138 return cls;
michael@0 139
michael@0 140 if(gRange->Lookup(aChar, &cls))
michael@0 141 return cls;
michael@0 142
michael@0 143 return kJISx4051Cls_15;
michael@0 144 }
michael@0 145
michael@0 146
michael@0 147 typedef enum {
michael@0 148 kJISx4051Cls_1 = 0,
michael@0 149 kJISx4051Cls_2 = 1,
michael@0 150 kJISx4051Cls_3 = 1,
michael@0 151 kJISx4051Cls_4 = 1,
michael@0 152 kJISx4051Cls_5 = 1,
michael@0 153 kJISx4051Cls_6 = 1,
michael@0 154 kJISx4051Cls_7 = 2,
michael@0 155 kJISx4051Cls_8 = 3,
michael@0 156 kJISx4051Cls_9 = 4,
michael@0 157 kJISx4051Cls_10 = 5,
michael@0 158 kJISx4051Cls_11 = 5,
michael@0 159 kJISx4051Cls_12 = 5,
michael@0 160 // kJISx4051Cls_13 = 0,
michael@0 161 // kJISx4051Cls_14 = 0,
michael@0 162 kJISx4051Cls_15 = 6,
michael@0 163 kJISx4051Cls_16 = 7,
michael@0 164 kJISx4051Cls_17 = 5,
michael@0 165 kJISx4051Cls_18 = 8,
michael@0 166 // kJISx4051Cls_19 = 0,
michael@0 167 // kJISx4051Cls_20 = 0
michael@0 168 } nsJISx4051Cls;
michael@0 169
michael@0 170
michael@0 171 // Table 2
michael@0 172 YYYY(kJISx4051Cls_1 , 0x0028),
michael@0 173 YYYY(kJISx4051Cls_1 , 0x005B),
michael@0 174 YYYY(kJISx4051Cls_1 , 0x007B),
michael@0 175 YYYY(kJISx4051Cls_1 , 0x2018),
michael@0 176 YYYY(kJISx4051Cls_1 , 0x201B),
michael@0 177 YYYY(kJISx4051Cls_1 , 0x201C),
michael@0 178 YYYY(kJISx4051Cls_1 , 0x201F),
michael@0 179 YYYY(kJISx4051Cls_1 , 0x3008),
michael@0 180 YYYY(kJISx4051Cls_1 , 0x300A),
michael@0 181 YYYY(kJISx4051Cls_1 , 0x300C),
michael@0 182 YYYY(kJISx4051Cls_1 , 0x300E),
michael@0 183 YYYY(kJISx4051Cls_1 , 0x3010),
michael@0 184 YYYY(kJISx4051Cls_1 , 0x3014),
michael@0 185 YYYY(kJISx4051Cls_1 , 0x3016),
michael@0 186 YYYY(kJISx4051Cls_1 , 0x3018),
michael@0 187 YYYY(kJISx4051Cls_1 , 0x301A),
michael@0 188 YYYY(kJISx4051Cls_1 , 0x301D),
michael@0 189
michael@0 190 // Table 3
michael@0 191 YYYY(kJISx4051Cls_2 , 0x0029),
michael@0 192 YYYY(kJISx4051Cls_2 , 0x002C),
michael@0 193 YYYY(kJISx4051Cls_2 , 0x005D),
michael@0 194 YYYY(kJISx4051Cls_2 , 0x007D),
michael@0 195 YYYY(kJISx4051Cls_2 , 0x2019),
michael@0 196 YYYY(kJISx4051Cls_2 , 0x201A),
michael@0 197 YYYY(kJISx4051Cls_2 , 0x201D),
michael@0 198 YYYY(kJISx4051Cls_2 , 0x201E),
michael@0 199 YYYY(kJISx4051Cls_2 , 0x3001),
michael@0 200 YYYY(kJISx4051Cls_2 , 0x3009),
michael@0 201 YYYY(kJISx4051Cls_2 , 0x300B),
michael@0 202 YYYY(kJISx4051Cls_2 , 0x300D),
michael@0 203 YYYY(kJISx4051Cls_2 , 0x300F),
michael@0 204 YYYY(kJISx4051Cls_2 , 0x3011),
michael@0 205 YYYY(kJISx4051Cls_2 , 0x3015),
michael@0 206 YYYY(kJISx4051Cls_2 , 0x3017),
michael@0 207 YYYY(kJISx4051Cls_2 , 0x3019),
michael@0 208 YYYY(kJISx4051Cls_2 , 0x301B),
michael@0 209 YYYY(kJISx4051Cls_2 , 0x301E),
michael@0 210 YYYY(kJISx4051Cls_2 , 0x301F),
michael@0 211
michael@0 212 // Table 4
michael@0 213 YYYY(kJISx4051Cls_3 , 0x203C),
michael@0 214 YYYY(kJISx4051Cls_3 , 0x2044),
michael@0 215 YYYY(kJISx4051Cls_3 , 0x301C),
michael@0 216 YYYY(kJISx4051Cls_3 , 0x3041),
michael@0 217 YYYY(kJISx4051Cls_3 , 0x3043),
michael@0 218 YYYY(kJISx4051Cls_3 , 0x3045),
michael@0 219 YYYY(kJISx4051Cls_3 , 0x3047),
michael@0 220 YYYY(kJISx4051Cls_3 , 0x3049),
michael@0 221 YYYY(kJISx4051Cls_3 , 0x3063),
michael@0 222 YYYY(kJISx4051Cls_3 , 0x3083),
michael@0 223 YYYY(kJISx4051Cls_3 , 0x3085),
michael@0 224 YYYY(kJISx4051Cls_3 , 0x3087),
michael@0 225 YYYY(kJISx4051Cls_3 , 0x308E),
michael@0 226 YYYY(kJISx4051Cls_3 , 0x309D),
michael@0 227 YYYY(kJISx4051Cls_3 , 0x309E),
michael@0 228 YYYY(kJISx4051Cls_3 , 0x30A1),
michael@0 229 YYYY(kJISx4051Cls_3 , 0x30A3),
michael@0 230 YYYY(kJISx4051Cls_3 , 0x30A5),
michael@0 231 YYYY(kJISx4051Cls_3 , 0x30A7),
michael@0 232 YYYY(kJISx4051Cls_3 , 0x30A9),
michael@0 233 YYYY(kJISx4051Cls_3 , 0x30C3),
michael@0 234 YYYY(kJISx4051Cls_3 , 0x30E3),
michael@0 235 YYYY(kJISx4051Cls_3 , 0x30E5),
michael@0 236 YYYY(kJISx4051Cls_3 , 0x30E7),
michael@0 237 YYYY(kJISx4051Cls_3 , 0x30EE),
michael@0 238 YYYY(kJISx4051Cls_3 , 0x30F5),
michael@0 239 YYYY(kJISx4051Cls_3 , 0x30F6),
michael@0 240 YYYY(kJISx4051Cls_3 , 0x30FC),
michael@0 241 YYYY(kJISx4051Cls_3 , 0x30FD),
michael@0 242 YYYY(kJISx4051Cls_3 , 0x30FE),
michael@0 243
michael@0 244 // Table 5
michael@0 245 YYYY(kJISx4051Cls_4 , 0x0021),
michael@0 246 YYYY(kJISx4051Cls_4 , 0x003F),
michael@0 247
michael@0 248 // Table 6
michael@0 249 YYYY(kJISx4051Cls_5 , 0x003A),
michael@0 250 YYYY(kJISx4051Cls_5 , 0x003B),
michael@0 251 YYYY(kJISx4051Cls_5 , 0x30FB),
michael@0 252
michael@0 253 // Table 7
michael@0 254 YYYY(kJISx4051Cls_6 , 0x002E),
michael@0 255 YYYY(kJISx4051Cls_6 , 0x3002),
michael@0 256
michael@0 257 // Table 8
michael@0 258 YYYY(kJISx4051Cls_7 , 0x2014),
michael@0 259 YYYY(kJISx4051Cls_7 , 0x2024),
michael@0 260 YYYY(kJISx4051Cls_7 , 0x2025),
michael@0 261 YYYY(kJISx4051Cls_7 , 0x2026),
michael@0 262
michael@0 263 // Table 9
michael@0 264 YYYY(kJISx4051Cls_8 , 0x0024),
michael@0 265 YYYY(kJISx4051Cls_8 , 0x00A3),
michael@0 266 YYYY(kJISx4051Cls_8 , 0x00A5),
michael@0 267 YYYY(kJISx4051Cls_8 , 0x2116),
michael@0 268
michael@0 269 // Table 10
michael@0 270 YYYY(kJISx4051Cls_9 , 0x0025),
michael@0 271 YYYY(kJISx4051Cls_9 , 0x00A2),
michael@0 272 YYYY(kJISx4051Cls_9 , 0x00B0),
michael@0 273 YYYY(kJISx4051Cls_9 , 0x2030),
michael@0 274 YYYY(kJISx4051Cls_9 , 0x2031),
michael@0 275 YYYY(kJISx4051Cls_9 , 0x2032),
michael@0 276 YYYY(kJISx4051Cls_9 , 0x2033),
michael@0 277
michael@0 278 // Table 1
michael@0 279 YYYY(kJISx4051Cls_10, 0x3000),
michael@0 280
michael@0 281 // Table 1
michael@0 282 ZZZZ(kJISx4051Cls_11, 0x3000),
michael@0 283
michael@0 284
michael@0 285
michael@0 286

mercurial