Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include <stdint.h> |
michael@0 | 7 | |
michael@0 | 8 | class nsIAtom; |
michael@0 | 9 | |
michael@0 | 10 | // The following constants define unicode subranges |
michael@0 | 11 | // values below kRangeNum must be continuous so that we can map to |
michael@0 | 12 | // lang group directly. |
michael@0 | 13 | // all ranges we care about should be defined under 32, that allows |
michael@0 | 14 | // us to store range using bits of a uint32_t |
michael@0 | 15 | |
michael@0 | 16 | // frequently used range definitions |
michael@0 | 17 | const uint8_t kRangeCyrillic = 0; |
michael@0 | 18 | const uint8_t kRangeGreek = 1; |
michael@0 | 19 | const uint8_t kRangeTurkish = 2; |
michael@0 | 20 | const uint8_t kRangeHebrew = 3; |
michael@0 | 21 | const uint8_t kRangeArabic = 4; |
michael@0 | 22 | const uint8_t kRangeBaltic = 5; |
michael@0 | 23 | const uint8_t kRangeThai = 6; |
michael@0 | 24 | const uint8_t kRangeKorean = 7; |
michael@0 | 25 | const uint8_t kRangeJapanese = 8; |
michael@0 | 26 | const uint8_t kRangeSChinese = 9; |
michael@0 | 27 | const uint8_t kRangeTChinese = 10; |
michael@0 | 28 | const uint8_t kRangeDevanagari = 11; |
michael@0 | 29 | const uint8_t kRangeTamil = 12; |
michael@0 | 30 | const uint8_t kRangeArmenian = 13; |
michael@0 | 31 | const uint8_t kRangeBengali = 14; |
michael@0 | 32 | const uint8_t kRangeCanadian = 15; |
michael@0 | 33 | const uint8_t kRangeEthiopic = 16; |
michael@0 | 34 | const uint8_t kRangeGeorgian = 17; |
michael@0 | 35 | const uint8_t kRangeGujarati = 18; |
michael@0 | 36 | const uint8_t kRangeGurmukhi = 19; |
michael@0 | 37 | const uint8_t kRangeKhmer = 20; |
michael@0 | 38 | const uint8_t kRangeMalayalam = 21; |
michael@0 | 39 | const uint8_t kRangeOriya = 22; |
michael@0 | 40 | const uint8_t kRangeTelugu = 23; |
michael@0 | 41 | const uint8_t kRangeKannada = 24; |
michael@0 | 42 | const uint8_t kRangeSinhala = 25; |
michael@0 | 43 | const uint8_t kRangeTibetan = 26; |
michael@0 | 44 | |
michael@0 | 45 | const uint8_t kRangeSpecificItemNum = 27; |
michael@0 | 46 | |
michael@0 | 47 | //range/rangeSet grow to this place 27-29 |
michael@0 | 48 | |
michael@0 | 49 | const uint8_t kRangeSetStart = 30; // range set definition starts from here |
michael@0 | 50 | const uint8_t kRangeSetLatin = 30; |
michael@0 | 51 | const uint8_t kRangeSetCJK = 31; |
michael@0 | 52 | const uint8_t kRangeSetEnd = 31; // range set definition ends here, this |
michael@0 | 53 | // and smaller ranges are used as bit |
michael@0 | 54 | // mask, don't increase this value. |
michael@0 | 55 | |
michael@0 | 56 | // less frequently used range definition |
michael@0 | 57 | const uint8_t kRangeSurrogate = 32; |
michael@0 | 58 | const uint8_t kRangePrivate = 33; |
michael@0 | 59 | const uint8_t kRangeMisc = 34; |
michael@0 | 60 | const uint8_t kRangeUnassigned = 35; |
michael@0 | 61 | const uint8_t kRangeSyriac = 36; |
michael@0 | 62 | const uint8_t kRangeThaana = 37; |
michael@0 | 63 | const uint8_t kRangeLao = 38; |
michael@0 | 64 | const uint8_t kRangeMyanmar = 39; |
michael@0 | 65 | const uint8_t kRangeCherokee = 40; |
michael@0 | 66 | const uint8_t kRangeOghamRunic = 41; |
michael@0 | 67 | const uint8_t kRangeMongolian = 42; |
michael@0 | 68 | const uint8_t kRangeMathOperators = 43; |
michael@0 | 69 | const uint8_t kRangeMiscTechnical = 44; |
michael@0 | 70 | const uint8_t kRangeControlOpticalEnclose = 45; |
michael@0 | 71 | const uint8_t kRangeBoxBlockGeometrics = 46; |
michael@0 | 72 | const uint8_t kRangeMiscSymbols = 47; |
michael@0 | 73 | const uint8_t kRangeDingbats = 48; |
michael@0 | 74 | const uint8_t kRangeBraillePattern = 49; |
michael@0 | 75 | const uint8_t kRangeYi = 50; |
michael@0 | 76 | const uint8_t kRangeCombiningDiacriticalMarks = 51; |
michael@0 | 77 | const uint8_t kRangeSpecials = 52; |
michael@0 | 78 | |
michael@0 | 79 | // aggregate ranges for non-BMP codepoints (u+2xxxx are all CJK) |
michael@0 | 80 | const uint8_t kRangeSMP = 53; // u+1xxxx |
michael@0 | 81 | const uint8_t kRangeHigherPlanes = 54; // u+3xxxx and above |
michael@0 | 82 | |
michael@0 | 83 | const uint8_t kRangeTableBase = 128; //values over 127 are reserved for internal use only |
michael@0 | 84 | const uint8_t kRangeTertiaryTable = 145; // leave room for 16 subtable |
michael@0 | 85 | // indices (kRangeTableBase + 1 .. |
michael@0 | 86 | // kRangeTableBase + 16) |
michael@0 | 87 | |
michael@0 | 88 | |
michael@0 | 89 | |
michael@0 | 90 | uint32_t FindCharUnicodeRange(uint32_t ch); |
michael@0 | 91 | nsIAtom* LangGroupFromUnicodeRange(uint8_t unicodeRange); |