michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include michael@0: michael@0: class nsIAtom; michael@0: michael@0: // The following constants define unicode subranges michael@0: // values below kRangeNum must be continuous so that we can map to michael@0: // lang group directly. michael@0: // all ranges we care about should be defined under 32, that allows michael@0: // us to store range using bits of a uint32_t michael@0: michael@0: // frequently used range definitions michael@0: const uint8_t kRangeCyrillic = 0; michael@0: const uint8_t kRangeGreek = 1; michael@0: const uint8_t kRangeTurkish = 2; michael@0: const uint8_t kRangeHebrew = 3; michael@0: const uint8_t kRangeArabic = 4; michael@0: const uint8_t kRangeBaltic = 5; michael@0: const uint8_t kRangeThai = 6; michael@0: const uint8_t kRangeKorean = 7; michael@0: const uint8_t kRangeJapanese = 8; michael@0: const uint8_t kRangeSChinese = 9; michael@0: const uint8_t kRangeTChinese = 10; michael@0: const uint8_t kRangeDevanagari = 11; michael@0: const uint8_t kRangeTamil = 12; michael@0: const uint8_t kRangeArmenian = 13; michael@0: const uint8_t kRangeBengali = 14; michael@0: const uint8_t kRangeCanadian = 15; michael@0: const uint8_t kRangeEthiopic = 16; michael@0: const uint8_t kRangeGeorgian = 17; michael@0: const uint8_t kRangeGujarati = 18; michael@0: const uint8_t kRangeGurmukhi = 19; michael@0: const uint8_t kRangeKhmer = 20; michael@0: const uint8_t kRangeMalayalam = 21; michael@0: const uint8_t kRangeOriya = 22; michael@0: const uint8_t kRangeTelugu = 23; michael@0: const uint8_t kRangeKannada = 24; michael@0: const uint8_t kRangeSinhala = 25; michael@0: const uint8_t kRangeTibetan = 26; michael@0: michael@0: const uint8_t kRangeSpecificItemNum = 27; michael@0: michael@0: //range/rangeSet grow to this place 27-29 michael@0: michael@0: const uint8_t kRangeSetStart = 30; // range set definition starts from here michael@0: const uint8_t kRangeSetLatin = 30; michael@0: const uint8_t kRangeSetCJK = 31; michael@0: const uint8_t kRangeSetEnd = 31; // range set definition ends here, this michael@0: // and smaller ranges are used as bit michael@0: // mask, don't increase this value. michael@0: michael@0: // less frequently used range definition michael@0: const uint8_t kRangeSurrogate = 32; michael@0: const uint8_t kRangePrivate = 33; michael@0: const uint8_t kRangeMisc = 34; michael@0: const uint8_t kRangeUnassigned = 35; michael@0: const uint8_t kRangeSyriac = 36; michael@0: const uint8_t kRangeThaana = 37; michael@0: const uint8_t kRangeLao = 38; michael@0: const uint8_t kRangeMyanmar = 39; michael@0: const uint8_t kRangeCherokee = 40; michael@0: const uint8_t kRangeOghamRunic = 41; michael@0: const uint8_t kRangeMongolian = 42; michael@0: const uint8_t kRangeMathOperators = 43; michael@0: const uint8_t kRangeMiscTechnical = 44; michael@0: const uint8_t kRangeControlOpticalEnclose = 45; michael@0: const uint8_t kRangeBoxBlockGeometrics = 46; michael@0: const uint8_t kRangeMiscSymbols = 47; michael@0: const uint8_t kRangeDingbats = 48; michael@0: const uint8_t kRangeBraillePattern = 49; michael@0: const uint8_t kRangeYi = 50; michael@0: const uint8_t kRangeCombiningDiacriticalMarks = 51; michael@0: const uint8_t kRangeSpecials = 52; michael@0: michael@0: // aggregate ranges for non-BMP codepoints (u+2xxxx are all CJK) michael@0: const uint8_t kRangeSMP = 53; // u+1xxxx michael@0: const uint8_t kRangeHigherPlanes = 54; // u+3xxxx and above michael@0: michael@0: const uint8_t kRangeTableBase = 128; //values over 127 are reserved for internal use only michael@0: const uint8_t kRangeTertiaryTable = 145; // leave room for 16 subtable michael@0: // indices (kRangeTableBase + 1 .. michael@0: // kRangeTableBase + 16) michael@0: michael@0: michael@0: michael@0: uint32_t FindCharUnicodeRange(uint32_t ch); michael@0: nsIAtom* LangGroupFromUnicodeRange(uint8_t unicodeRange);