Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include "nsUnicodeRange.h" |
michael@0 | 7 | #include "nsGkAtoms.h" |
michael@0 | 8 | #include "mozilla/NullPtr.h" |
michael@0 | 9 | |
michael@0 | 10 | // This table depends on unicode range definitions. |
michael@0 | 11 | // Each item's index must correspond unicode range value |
michael@0 | 12 | // eg. x-cyrillic = LangGroupTable[kRangeCyrillic] |
michael@0 | 13 | static nsIAtom **gUnicodeRangeToLangGroupAtomTable[] = |
michael@0 | 14 | { |
michael@0 | 15 | &nsGkAtoms::x_cyrillic, |
michael@0 | 16 | &nsGkAtoms::el_, |
michael@0 | 17 | &nsGkAtoms::tr, |
michael@0 | 18 | &nsGkAtoms::he, |
michael@0 | 19 | &nsGkAtoms::ar, |
michael@0 | 20 | &nsGkAtoms::x_baltic, |
michael@0 | 21 | &nsGkAtoms::th, |
michael@0 | 22 | &nsGkAtoms::ko, |
michael@0 | 23 | &nsGkAtoms::Japanese, |
michael@0 | 24 | &nsGkAtoms::zh_cn, |
michael@0 | 25 | &nsGkAtoms::zh_tw, |
michael@0 | 26 | &nsGkAtoms::x_devanagari, |
michael@0 | 27 | &nsGkAtoms::x_tamil, |
michael@0 | 28 | &nsGkAtoms::x_armn, |
michael@0 | 29 | &nsGkAtoms::x_beng, |
michael@0 | 30 | &nsGkAtoms::x_cans, |
michael@0 | 31 | &nsGkAtoms::x_ethi, |
michael@0 | 32 | &nsGkAtoms::x_geor, |
michael@0 | 33 | &nsGkAtoms::x_gujr, |
michael@0 | 34 | &nsGkAtoms::x_guru, |
michael@0 | 35 | &nsGkAtoms::x_khmr, |
michael@0 | 36 | &nsGkAtoms::x_mlym, |
michael@0 | 37 | &nsGkAtoms::x_orya, |
michael@0 | 38 | &nsGkAtoms::x_telu, |
michael@0 | 39 | &nsGkAtoms::x_knda, |
michael@0 | 40 | &nsGkAtoms::x_sinh, |
michael@0 | 41 | &nsGkAtoms::x_tibt |
michael@0 | 42 | }; |
michael@0 | 43 | |
michael@0 | 44 | /********************************************************************** |
michael@0 | 45 | * Unicode subranges as defined in unicode 3.0 |
michael@0 | 46 | * x-western, x-central-euro, tr, x-baltic -> latin |
michael@0 | 47 | * 0000 - 036f |
michael@0 | 48 | * 1e00 - 1eff |
michael@0 | 49 | * 2000 - 206f (general punctuation) |
michael@0 | 50 | * 20a0 - 20cf (currency symbols) |
michael@0 | 51 | * 2100 - 214f (letterlike symbols) |
michael@0 | 52 | * 2150 - 218f (Number Forms) |
michael@0 | 53 | * el -> greek |
michael@0 | 54 | * 0370 - 03ff |
michael@0 | 55 | * 1f00 - 1fff |
michael@0 | 56 | * x-cyrillic -> cyrillic |
michael@0 | 57 | * 0400 - 04ff |
michael@0 | 58 | * he -> hebrew |
michael@0 | 59 | * 0590 - 05ff |
michael@0 | 60 | * ar -> arabic |
michael@0 | 61 | * 0600 - 06ff |
michael@0 | 62 | * fb50 - fdff (arabic presentation forms) |
michael@0 | 63 | * fe70 - feff (arabic presentation forms b) |
michael@0 | 64 | * th - thai |
michael@0 | 65 | * 0e00 - 0e7f |
michael@0 | 66 | * ko -> korean |
michael@0 | 67 | * ac00 - d7af (hangul Syllables) |
michael@0 | 68 | * 1100 - 11ff (jamo) |
michael@0 | 69 | * 3130 - 318f (hangul compatibility jamo) |
michael@0 | 70 | * ja |
michael@0 | 71 | * 3040 - 309f (hiragana) |
michael@0 | 72 | * 30a0 - 30ff (katakana) |
michael@0 | 73 | * zh-CN |
michael@0 | 74 | * zh-TW |
michael@0 | 75 | * |
michael@0 | 76 | * CJK |
michael@0 | 77 | * 3100 - 312f (bopomofo) |
michael@0 | 78 | * 31a0 - 31bf (bopomofo extended) |
michael@0 | 79 | * 3000 - 303f (CJK Symbols and Punctuation) |
michael@0 | 80 | * 2e80 - 2eff (CJK radicals supplement) |
michael@0 | 81 | * 2f00 - 2fdf (Kangxi Radicals) |
michael@0 | 82 | * 2ff0 - 2fff (Ideographic Description Characters) |
michael@0 | 83 | * 3190 - 319f (kanbun) |
michael@0 | 84 | * 3200 - 32ff (Enclosed CJK letters and Months) |
michael@0 | 85 | * 3300 - 33ff (CJK compatibility) |
michael@0 | 86 | * 3400 - 4dbf (CJK Unified Ideographs Extension A) |
michael@0 | 87 | * 4e00 - 9faf (CJK Unified Ideographs) |
michael@0 | 88 | * f900 - fa5f (CJK Compatibility Ideographs) |
michael@0 | 89 | * fe30 - fe4f (CJK compatibility Forms) |
michael@0 | 90 | * ff00 - ffef (halfwidth and fullwidth forms) |
michael@0 | 91 | * |
michael@0 | 92 | * Armenian |
michael@0 | 93 | * 0530 - 058f |
michael@0 | 94 | * Sriac |
michael@0 | 95 | * 0700 - 074f |
michael@0 | 96 | * Thaana |
michael@0 | 97 | * 0780 - 07bf |
michael@0 | 98 | * Devanagari |
michael@0 | 99 | * 0900 - 097f |
michael@0 | 100 | * Bengali |
michael@0 | 101 | * 0980 - 09ff |
michael@0 | 102 | * Gurmukhi |
michael@0 | 103 | * 0a00 - 0a7f |
michael@0 | 104 | * Gujarati |
michael@0 | 105 | * 0a80 - 0aff |
michael@0 | 106 | * Oriya |
michael@0 | 107 | * 0b00 - 0b7f |
michael@0 | 108 | * Tamil |
michael@0 | 109 | * 0b80 - 0bff |
michael@0 | 110 | * Telugu |
michael@0 | 111 | * 0c00 - 0c7f |
michael@0 | 112 | * Kannada |
michael@0 | 113 | * 0c80 - 0cff |
michael@0 | 114 | * Malayalam |
michael@0 | 115 | * 0d00 - 0d7f |
michael@0 | 116 | * Sinhala |
michael@0 | 117 | * 0d80 - 0def |
michael@0 | 118 | * Lao |
michael@0 | 119 | * 0e80 - 0eff |
michael@0 | 120 | * Tibetan |
michael@0 | 121 | * 0f00 - 0fbf |
michael@0 | 122 | * Myanmar |
michael@0 | 123 | * 1000 - 109f |
michael@0 | 124 | * Georgian |
michael@0 | 125 | * 10a0 - 10ff |
michael@0 | 126 | * Ethiopic |
michael@0 | 127 | * 1200 - 137f |
michael@0 | 128 | * Cherokee |
michael@0 | 129 | * 13a0 - 13ff |
michael@0 | 130 | * Canadian Aboriginal Syllabics |
michael@0 | 131 | * 1400 - 167f |
michael@0 | 132 | * Ogham |
michael@0 | 133 | * 1680 - 169f |
michael@0 | 134 | * Runic |
michael@0 | 135 | * 16a0 - 16ff |
michael@0 | 136 | * Khmer |
michael@0 | 137 | * 1780 - 17ff |
michael@0 | 138 | * Mongolian |
michael@0 | 139 | * 1800 - 18af |
michael@0 | 140 | * Misc - superscripts and subscripts |
michael@0 | 141 | * 2070 - 209f |
michael@0 | 142 | * Misc - Combining Diacritical Marks for Symbols |
michael@0 | 143 | * 20d0 - 20ff |
michael@0 | 144 | * Misc - Arrows |
michael@0 | 145 | * 2190 - 21ff |
michael@0 | 146 | * Misc - Mathematical Operators |
michael@0 | 147 | * 2200 - 22ff |
michael@0 | 148 | * Misc - Miscellaneous Technical |
michael@0 | 149 | * 2300 - 23ff |
michael@0 | 150 | * Misc - Control picture |
michael@0 | 151 | * 2400 - 243f |
michael@0 | 152 | * Misc - Optical character recognition |
michael@0 | 153 | * 2440 - 2450 |
michael@0 | 154 | * Misc - Enclose Alphanumerics |
michael@0 | 155 | * 2460 - 24ff |
michael@0 | 156 | * Misc - Box Drawing |
michael@0 | 157 | * 2500 - 257f |
michael@0 | 158 | * Misc - Block Elements |
michael@0 | 159 | * 2580 - 259f |
michael@0 | 160 | * Misc - Geometric Shapes |
michael@0 | 161 | * 25a0 - 25ff |
michael@0 | 162 | * Misc - Miscellaneous Symbols |
michael@0 | 163 | * 2600 - 267f |
michael@0 | 164 | * Misc - Dingbats |
michael@0 | 165 | * 2700 - 27bf |
michael@0 | 166 | * Misc - Braille Patterns |
michael@0 | 167 | * 2800 - 28ff |
michael@0 | 168 | * Yi Syllables |
michael@0 | 169 | * a000 - a48f |
michael@0 | 170 | * Yi radicals |
michael@0 | 171 | * a490 - a4cf |
michael@0 | 172 | * Alphabetic Presentation Forms |
michael@0 | 173 | * fb00 - fb4f |
michael@0 | 174 | * Misc - Combining half Marks |
michael@0 | 175 | * fe20 - fe2f |
michael@0 | 176 | * Misc - small form variants |
michael@0 | 177 | * fe50 - fe6f |
michael@0 | 178 | * Misc - Specials |
michael@0 | 179 | * fff0 - ffff |
michael@0 | 180 | *********************************************************************/ |
michael@0 | 181 | |
michael@0 | 182 | |
michael@0 | 183 | |
michael@0 | 184 | #define NUM_OF_SUBTABLES 10 |
michael@0 | 185 | #define SUBTABLE_SIZE 16 |
michael@0 | 186 | |
michael@0 | 187 | static const uint8_t gUnicodeSubrangeTable[NUM_OF_SUBTABLES][SUBTABLE_SIZE] = |
michael@0 | 188 | { |
michael@0 | 189 | { // table for X--- |
michael@0 | 190 | kRangeTableBase+1, //u0xxx |
michael@0 | 191 | kRangeTableBase+2, //u1xxx |
michael@0 | 192 | kRangeTableBase+3, //u2xxx |
michael@0 | 193 | kRangeSetCJK, //u3xxx |
michael@0 | 194 | kRangeSetCJK, //u4xxx |
michael@0 | 195 | kRangeSetCJK, //u5xxx |
michael@0 | 196 | kRangeSetCJK, //u6xxx |
michael@0 | 197 | kRangeSetCJK, //u7xxx |
michael@0 | 198 | kRangeSetCJK, //u8xxx |
michael@0 | 199 | kRangeSetCJK, //u9xxx |
michael@0 | 200 | kRangeTableBase+4, //uaxxx |
michael@0 | 201 | kRangeKorean, //ubxxx |
michael@0 | 202 | kRangeKorean, //ucxxx |
michael@0 | 203 | kRangeTableBase+5, //udxxx |
michael@0 | 204 | kRangePrivate, //uexxx |
michael@0 | 205 | kRangeTableBase+6 //ufxxx |
michael@0 | 206 | }, |
michael@0 | 207 | { //table for 0X-- |
michael@0 | 208 | kRangeSetLatin, //u00xx |
michael@0 | 209 | kRangeSetLatin, //u01xx |
michael@0 | 210 | kRangeSetLatin, //u02xx |
michael@0 | 211 | kRangeGreek, //u03xx XXX 0300-036f is in fact kRangeCombiningDiacriticalMarks |
michael@0 | 212 | kRangeCyrillic, //u04xx |
michael@0 | 213 | kRangeTableBase+7, //u05xx, includes Cyrillic supplement, Hebrew, and Armenian |
michael@0 | 214 | kRangeArabic, //u06xx |
michael@0 | 215 | kRangeTertiaryTable, //u07xx |
michael@0 | 216 | kRangeUnassigned, //u08xx |
michael@0 | 217 | kRangeTertiaryTable, //u09xx |
michael@0 | 218 | kRangeTertiaryTable, //u0axx |
michael@0 | 219 | kRangeTertiaryTable, //u0bxx |
michael@0 | 220 | kRangeTertiaryTable, //u0cxx |
michael@0 | 221 | kRangeTertiaryTable, //u0dxx |
michael@0 | 222 | kRangeTertiaryTable, //u0exx |
michael@0 | 223 | kRangeTibetan //u0fxx |
michael@0 | 224 | }, |
michael@0 | 225 | { //table for 1x-- |
michael@0 | 226 | kRangeTertiaryTable, //u10xx |
michael@0 | 227 | kRangeKorean, //u11xx |
michael@0 | 228 | kRangeEthiopic, //u12xx |
michael@0 | 229 | kRangeTertiaryTable, //u13xx |
michael@0 | 230 | kRangeCanadian, //u14xx |
michael@0 | 231 | kRangeCanadian, //u15xx |
michael@0 | 232 | kRangeTertiaryTable, //u16xx |
michael@0 | 233 | kRangeKhmer, //u17xx |
michael@0 | 234 | kRangeMongolian, //u18xx |
michael@0 | 235 | kRangeUnassigned, //u19xx |
michael@0 | 236 | kRangeUnassigned, //u1axx |
michael@0 | 237 | kRangeUnassigned, //u1bxx |
michael@0 | 238 | kRangeUnassigned, //u1cxx |
michael@0 | 239 | kRangeUnassigned, //u1dxx |
michael@0 | 240 | kRangeSetLatin, //u1exx |
michael@0 | 241 | kRangeGreek //u1fxx |
michael@0 | 242 | }, |
michael@0 | 243 | { //table for 2x-- |
michael@0 | 244 | kRangeSetLatin, //u20xx |
michael@0 | 245 | kRangeSetLatin, //u21xx |
michael@0 | 246 | kRangeMathOperators, //u22xx |
michael@0 | 247 | kRangeMiscTechnical, //u23xx |
michael@0 | 248 | kRangeControlOpticalEnclose, //u24xx |
michael@0 | 249 | kRangeBoxBlockGeometrics, //u25xx |
michael@0 | 250 | kRangeMiscSymbols, //u26xx |
michael@0 | 251 | kRangeDingbats, //u27xx |
michael@0 | 252 | kRangeBraillePattern, //u28xx |
michael@0 | 253 | kRangeUnassigned, //u29xx |
michael@0 | 254 | kRangeUnassigned, //u2axx |
michael@0 | 255 | kRangeUnassigned, //u2bxx |
michael@0 | 256 | kRangeUnassigned, //u2cxx |
michael@0 | 257 | kRangeUnassigned, //u2dxx |
michael@0 | 258 | kRangeSetCJK, //u2exx |
michael@0 | 259 | kRangeSetCJK //u2fxx |
michael@0 | 260 | }, |
michael@0 | 261 | { //table for ax-- |
michael@0 | 262 | kRangeYi, //ua0xx |
michael@0 | 263 | kRangeYi, //ua1xx |
michael@0 | 264 | kRangeYi, //ua2xx |
michael@0 | 265 | kRangeYi, //ua3xx |
michael@0 | 266 | kRangeYi, //ua4xx |
michael@0 | 267 | kRangeUnassigned, //ua5xx |
michael@0 | 268 | kRangeUnassigned, //ua6xx |
michael@0 | 269 | kRangeUnassigned, //ua7xx |
michael@0 | 270 | kRangeUnassigned, //ua8xx |
michael@0 | 271 | kRangeUnassigned, //ua9xx |
michael@0 | 272 | kRangeUnassigned, //uaaxx |
michael@0 | 273 | kRangeUnassigned, //uabxx |
michael@0 | 274 | kRangeKorean, //uacxx |
michael@0 | 275 | kRangeKorean, //uadxx |
michael@0 | 276 | kRangeKorean, //uaexx |
michael@0 | 277 | kRangeKorean //uafxx |
michael@0 | 278 | }, |
michael@0 | 279 | { //table for dx-- |
michael@0 | 280 | kRangeKorean, //ud0xx |
michael@0 | 281 | kRangeKorean, //ud1xx |
michael@0 | 282 | kRangeKorean, //ud2xx |
michael@0 | 283 | kRangeKorean, //ud3xx |
michael@0 | 284 | kRangeKorean, //ud4xx |
michael@0 | 285 | kRangeKorean, //ud5xx |
michael@0 | 286 | kRangeKorean, //ud6xx |
michael@0 | 287 | kRangeKorean, //ud7xx |
michael@0 | 288 | kRangeSurrogate, //ud8xx |
michael@0 | 289 | kRangeSurrogate, //ud9xx |
michael@0 | 290 | kRangeSurrogate, //udaxx |
michael@0 | 291 | kRangeSurrogate, //udbxx |
michael@0 | 292 | kRangeSurrogate, //udcxx |
michael@0 | 293 | kRangeSurrogate, //uddxx |
michael@0 | 294 | kRangeSurrogate, //udexx |
michael@0 | 295 | kRangeSurrogate //udfxx |
michael@0 | 296 | }, |
michael@0 | 297 | { // table for fx-- |
michael@0 | 298 | kRangePrivate, //uf0xx |
michael@0 | 299 | kRangePrivate, //uf1xx |
michael@0 | 300 | kRangePrivate, //uf2xx |
michael@0 | 301 | kRangePrivate, //uf3xx |
michael@0 | 302 | kRangePrivate, //uf4xx |
michael@0 | 303 | kRangePrivate, //uf5xx |
michael@0 | 304 | kRangePrivate, //uf6xx |
michael@0 | 305 | kRangePrivate, //uf7xx |
michael@0 | 306 | kRangePrivate, //uf8xx |
michael@0 | 307 | kRangeSetCJK, //uf9xx |
michael@0 | 308 | kRangeSetCJK, //ufaxx |
michael@0 | 309 | kRangeArabic, //ufbxx, includes alphabic presentation form |
michael@0 | 310 | kRangeArabic, //ufcxx |
michael@0 | 311 | kRangeArabic, //ufdxx |
michael@0 | 312 | kRangeTableBase+8, //ufexx |
michael@0 | 313 | kRangeTableBase+9 //uffxx, halfwidth and fullwidth forms, includes Specials |
michael@0 | 314 | }, |
michael@0 | 315 | { //table for 0x0500 - 0x05ff |
michael@0 | 316 | kRangeCyrillic, //u050x |
michael@0 | 317 | kRangeCyrillic, //u051x |
michael@0 | 318 | kRangeCyrillic, //u052x |
michael@0 | 319 | kRangeArmenian, //u053x |
michael@0 | 320 | kRangeArmenian, //u054x |
michael@0 | 321 | kRangeArmenian, //u055x |
michael@0 | 322 | kRangeArmenian, //u056x |
michael@0 | 323 | kRangeArmenian, //u057x |
michael@0 | 324 | kRangeArmenian, //u058x |
michael@0 | 325 | kRangeHebrew, //u059x |
michael@0 | 326 | kRangeHebrew, //u05ax |
michael@0 | 327 | kRangeHebrew, //u05bx |
michael@0 | 328 | kRangeHebrew, //u05cx |
michael@0 | 329 | kRangeHebrew, //u05dx |
michael@0 | 330 | kRangeHebrew, //u05ex |
michael@0 | 331 | kRangeHebrew //u05fx |
michael@0 | 332 | }, |
michael@0 | 333 | { //table for 0xfe00 - 0xfeff |
michael@0 | 334 | kRangeSetCJK, //ufe0x |
michael@0 | 335 | kRangeSetCJK, //ufe1x |
michael@0 | 336 | kRangeSetCJK, //ufe2x |
michael@0 | 337 | kRangeSetCJK, //ufe3x |
michael@0 | 338 | kRangeSetCJK, //ufe4x |
michael@0 | 339 | kRangeSetCJK, //ufe5x |
michael@0 | 340 | kRangeSetCJK, //ufe6x |
michael@0 | 341 | kRangeArabic, //ufe7x |
michael@0 | 342 | kRangeArabic, //ufe8x |
michael@0 | 343 | kRangeArabic, //ufe9x |
michael@0 | 344 | kRangeArabic, //ufeax |
michael@0 | 345 | kRangeArabic, //ufebx |
michael@0 | 346 | kRangeArabic, //ufecx |
michael@0 | 347 | kRangeArabic, //ufedx |
michael@0 | 348 | kRangeArabic, //ufeex |
michael@0 | 349 | kRangeArabic //ufefx |
michael@0 | 350 | }, |
michael@0 | 351 | { //table for 0xff00 - 0xffff |
michael@0 | 352 | kRangeSetCJK, //uff0x, fullwidth latin |
michael@0 | 353 | kRangeSetCJK, //uff1x, fullwidth latin |
michael@0 | 354 | kRangeSetCJK, //uff2x, fullwidth latin |
michael@0 | 355 | kRangeSetCJK, //uff3x, fullwidth latin |
michael@0 | 356 | kRangeSetCJK, //uff4x, fullwidth latin |
michael@0 | 357 | kRangeSetCJK, //uff5x, fullwidth latin |
michael@0 | 358 | kRangeSetCJK, //uff6x, halfwidth katakana |
michael@0 | 359 | kRangeSetCJK, //uff7x, halfwidth katakana |
michael@0 | 360 | kRangeSetCJK, //uff8x, halfwidth katakana |
michael@0 | 361 | kRangeSetCJK, //uff9x, halfwidth katakana |
michael@0 | 362 | kRangeSetCJK, //uffax, halfwidth hangul jamo |
michael@0 | 363 | kRangeSetCJK, //uffbx, halfwidth hangul jamo |
michael@0 | 364 | kRangeSetCJK, //uffcx, halfwidth hangul jamo |
michael@0 | 365 | kRangeSetCJK, //uffdx, halfwidth hangul jamo |
michael@0 | 366 | kRangeSetCJK, //uffex, fullwidth symbols |
michael@0 | 367 | kRangeSpecials, //ufffx, Specials |
michael@0 | 368 | }, |
michael@0 | 369 | }; |
michael@0 | 370 | |
michael@0 | 371 | // Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80) |
michael@0 | 372 | // code points so that the number of entries in the tertiary range |
michael@0 | 373 | // table for that range is obtained by dividing (0x1700 - 0x0700) by 128. |
michael@0 | 374 | // Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal |
michael@0 | 375 | // syllabaries take multiple chunks and Ogham and Runic share a single chunk. |
michael@0 | 376 | #define TERTIARY_TABLE_SIZE ((0x1700 - 0x0700) / 0x80) |
michael@0 | 377 | |
michael@0 | 378 | static const uint8_t gUnicodeTertiaryRangeTable[TERTIARY_TABLE_SIZE] = |
michael@0 | 379 | { //table for 0x0700 - 0x1600 |
michael@0 | 380 | kRangeSyriac, //u070x |
michael@0 | 381 | kRangeThaana, //u078x |
michael@0 | 382 | kRangeUnassigned, //u080x place holder(resolved in the 2ndary tab.) |
michael@0 | 383 | kRangeUnassigned, //u088x place holder(resolved in the 2ndary tab.) |
michael@0 | 384 | kRangeDevanagari, //u090x |
michael@0 | 385 | kRangeBengali, //u098x |
michael@0 | 386 | kRangeGurmukhi, //u0a0x |
michael@0 | 387 | kRangeGujarati, //u0a8x |
michael@0 | 388 | kRangeOriya, //u0b0x |
michael@0 | 389 | kRangeTamil, //u0b8x |
michael@0 | 390 | kRangeTelugu, //u0c0x |
michael@0 | 391 | kRangeKannada, //u0c8x |
michael@0 | 392 | kRangeMalayalam, //u0d0x |
michael@0 | 393 | kRangeSinhala, //u0d8x |
michael@0 | 394 | kRangeThai, //u0e0x |
michael@0 | 395 | kRangeLao, //u0e8x |
michael@0 | 396 | kRangeTibetan, //u0f0x place holder(resolved in the 2ndary tab.) |
michael@0 | 397 | kRangeTibetan, //u0f8x place holder(resolved in the 2ndary tab.) |
michael@0 | 398 | kRangeMyanmar, //u100x |
michael@0 | 399 | kRangeGeorgian, //u108x |
michael@0 | 400 | kRangeKorean, //u110x place holder(resolved in the 2ndary tab.) |
michael@0 | 401 | kRangeKorean, //u118x place holder(resolved in the 2ndary tab.) |
michael@0 | 402 | kRangeEthiopic, //u120x place holder(resolved in the 2ndary tab.) |
michael@0 | 403 | kRangeEthiopic, //u128x place holder(resolved in the 2ndary tab.) |
michael@0 | 404 | kRangeEthiopic, //u130x |
michael@0 | 405 | kRangeCherokee, //u138x |
michael@0 | 406 | kRangeCanadian, //u140x place holder(resolved in the 2ndary tab.) |
michael@0 | 407 | kRangeCanadian, //u148x place holder(resolved in the 2ndary tab.) |
michael@0 | 408 | kRangeCanadian, //u150x place holder(resolved in the 2ndary tab.) |
michael@0 | 409 | kRangeCanadian, //u158x place holder(resolved in the 2ndary tab.) |
michael@0 | 410 | kRangeCanadian, //u160x |
michael@0 | 411 | kRangeOghamRunic //u168x this contains two scripts, Ogham & Runic |
michael@0 | 412 | }; |
michael@0 | 413 | |
michael@0 | 414 | // A two level index is almost enough for locating a range, with the |
michael@0 | 415 | // exception of u03xx and u05xx. Since we don't really care about range for |
michael@0 | 416 | // combining diacritical marks in our font application, they are |
michael@0 | 417 | // not discriminated further. But future adoption of this module for other use |
michael@0 | 418 | // should be aware of this limitation. The implementation can be extended if |
michael@0 | 419 | // there is such a need. |
michael@0 | 420 | // For Indic, Southeast Asian scripts and some other scripts between |
michael@0 | 421 | // U+0700 and U+16FF, it's extended to the third level. |
michael@0 | 422 | uint32_t FindCharUnicodeRange(uint32_t ch) |
michael@0 | 423 | { |
michael@0 | 424 | uint32_t range; |
michael@0 | 425 | |
michael@0 | 426 | // aggregate ranges for non-BMP codepoints |
michael@0 | 427 | if (ch > 0xFFFF) { |
michael@0 | 428 | uint32_t p = (ch >> 16); |
michael@0 | 429 | if (p == 1) { |
michael@0 | 430 | return kRangeSMP; |
michael@0 | 431 | } else if (p == 2) { |
michael@0 | 432 | return kRangeSetCJK; |
michael@0 | 433 | } |
michael@0 | 434 | return kRangeHigherPlanes; |
michael@0 | 435 | } |
michael@0 | 436 | |
michael@0 | 437 | // lookup explicit range for BMP codepoints |
michael@0 | 438 | // first general range |
michael@0 | 439 | range = gUnicodeSubrangeTable[0][ch >> 12]; |
michael@0 | 440 | |
michael@0 | 441 | // if general range is good enough, return that |
michael@0 | 442 | if (range < kRangeTableBase) |
michael@0 | 443 | // we try to get a specific range |
michael@0 | 444 | return range; |
michael@0 | 445 | |
michael@0 | 446 | // otherwise, use subrange tables |
michael@0 | 447 | range = gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x0f00) >> 8]; |
michael@0 | 448 | if (range < kRangeTableBase) |
michael@0 | 449 | return range; |
michael@0 | 450 | if (range < kRangeTertiaryTable) |
michael@0 | 451 | return gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x00f0) >> 4]; |
michael@0 | 452 | |
michael@0 | 453 | // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks |
michael@0 | 454 | return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7]; |
michael@0 | 455 | } |
michael@0 | 456 | |
michael@0 | 457 | nsIAtom *LangGroupFromUnicodeRange(uint8_t unicodeRange) |
michael@0 | 458 | { |
michael@0 | 459 | if (kRangeSpecificItemNum > unicodeRange) { |
michael@0 | 460 | nsIAtom **atom = gUnicodeRangeToLangGroupAtomTable[unicodeRange]; |
michael@0 | 461 | return *atom; |
michael@0 | 462 | } |
michael@0 | 463 | return nullptr; |
michael@0 | 464 | } |