michael@0: /* GRAPHITE2 LICENSING michael@0: michael@0: Copyright 2010, SIL International michael@0: All rights reserved. michael@0: michael@0: This library is free software; you can redistribute it and/or modify michael@0: it under the terms of the GNU Lesser General Public License as published michael@0: by the Free Software Foundation; either version 2.1 of License, or michael@0: (at your option) any later version. michael@0: michael@0: This program is distributed in the hope that it will be useful, michael@0: but WITHOUT ANY WARRANTY; without even the implied warranty of michael@0: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU michael@0: Lesser General Public License for more details. michael@0: michael@0: You should also have received a copy of the GNU Lesser General Public michael@0: License along with this library in the file named "LICENSE". michael@0: If not, write to the Free Software Foundation, 51 Franklin Street, michael@0: Suite 500, Boston, MA 02110-1335, USA or visit their web page on the michael@0: internet at http://www.fsf.org/licenses/lgpl.html. michael@0: michael@0: Alternatively, the contents of this file may be used under the terms of the michael@0: Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public michael@0: License, as published by the Free Software Foundation, either version 2 michael@0: of the License or (at your option) any later version. michael@0: */ michael@0: #pragma once michael@0: #include michael@0: #include michael@0: michael@0: #include "inc/Main.h" michael@0: michael@0: michael@0: namespace graphite2 { michael@0: michael@0: struct IsoLangEntry michael@0: { michael@0: unsigned short mnLang; michael@0: const char maLangStr[4]; michael@0: const char maCountry[3]; michael@0: }; michael@0: michael@0: // Windows Language ID, Locale ISO-639 language, country code as used in michael@0: // naming table of OpenType fonts michael@0: const IsoLangEntry LANG_ENTRIES[] = { michael@0: { 0x0401, "ar","SA" }, // Arabic Saudi Arabia michael@0: { 0x0402, "bg","BG" }, // Bulgarian Bulgaria michael@0: { 0x0403, "ca","ES" }, // Catalan Catalan michael@0: { 0x0404, "zh","TW" }, // Chinese Taiwan michael@0: { 0x0405, "cs","CZ" }, // Czech Czech Republic michael@0: { 0x0406, "da","DK" }, // Danish Denmark michael@0: { 0x0407, "de","DE" }, // German Germany michael@0: { 0x0408, "el","GR" }, // Greek Greece michael@0: { 0x0409, "en","US" }, // English United States michael@0: { 0x040A, "es","ES" }, // Spanish (Traditional Sort) Spain michael@0: { 0x040B, "fi","FI" }, // Finnish Finland michael@0: { 0x040C, "fr","FR" }, // French France michael@0: { 0x040D, "he","IL" }, // Hebrew Israel michael@0: { 0x040E, "hu","HU" }, // Hungarian Hungary michael@0: { 0x040F, "is","IS" }, // Icelandic Iceland michael@0: { 0x0410, "it","IT" }, // Italian Italy michael@0: { 0x0411, "jp","JP" }, // Japanese Japan michael@0: { 0x0412, "ko","KR" }, // Korean Korea michael@0: { 0x0413, "nl","NL" }, // Dutch Netherlands michael@0: { 0x0414, "no","NO" }, // Norwegian (Bokmal) Norway michael@0: { 0x0415, "pl","PL" }, // Polish Poland michael@0: { 0x0416, "pt","BR" }, // Portuguese Brazil michael@0: { 0x0417, "rm","CH" }, // Romansh Switzerland michael@0: { 0x0418, "ro","RO" }, // Romanian Romania michael@0: { 0x0419, "ru","RU" }, // Russian Russia michael@0: { 0x041A, "hr","HR" }, // Croatian Croatia michael@0: { 0x041B, "sk","SK" }, // Slovak Slovakia michael@0: { 0x041C, "sq","AL" }, // Albanian Albania michael@0: { 0x041D, "sv","SE" }, // Swedish Sweden michael@0: { 0x041E, "th","TH" }, // Thai Thailand michael@0: { 0x041F, "tr","TR" }, // Turkish Turkey michael@0: { 0x0420, "ur","PK" }, // Urdu Islamic Republic of Pakistan michael@0: { 0x0421, "id","ID" }, // Indonesian Indonesia michael@0: { 0x0422, "uk","UA" }, // Ukrainian Ukraine michael@0: { 0x0423, "be","BY" }, // Belarusian Belarus michael@0: { 0x0424, "sl","SI" }, // Slovenian Slovenia michael@0: { 0x0425, "et","EE" }, // Estonian Estonia michael@0: { 0x0426, "lv","LV" }, // Latvian Latvia michael@0: { 0x0427, "lt","LT" }, // Lithuanian Lithuania michael@0: { 0x0428, "tg","TJ" }, // Tajik (Cyrillic) Tajikistan michael@0: { 0x042A, "vi","VN" }, // Vietnamese Vietnam michael@0: { 0x042B, "hy","AM" }, // Armenian Armenia michael@0: { 0x042C, "az","AZ" }, // Azeri (Latin) Azerbaijan michael@0: { 0x042D, "eu","" }, // Basque Basque michael@0: { 0x042E, "hsb","DE" }, // Upper Sorbian Germany michael@0: { 0x042F, "mk","MK" }, // Macedonian (FYROM) Former Yugoslav Republic of Macedonia michael@0: { 0x0432, "tn","ZA" }, // Setswana South Africa michael@0: { 0x0434, "xh","ZA" }, // isiXhosa South Africa michael@0: { 0x0435, "zu","ZA" }, // isiZulu South Africa michael@0: { 0x0436, "af","ZA" }, // Afrikaans South Africa michael@0: { 0x0437, "ka","GE" }, // Georgian Georgia michael@0: { 0x0438, "fo","FO" }, // Faroese Faroe Islands michael@0: { 0x0439, "hi","IN" }, // Hindi India michael@0: { 0x043A, "mt","MT" }, // Maltese Malta michael@0: { 0x043B, "se","NO" }, // Sami (Northern) Norway michael@0: { 0x043E, "ms","MY" }, // Malay Malaysia michael@0: { 0x043F, "kk","KZ" }, // Kazakh Kazakhstan michael@0: { 0x0440, "ky","KG" }, // Kyrgyz Kyrgyzstan michael@0: { 0x0441, "sw","KE" }, // Kiswahili Kenya michael@0: { 0x0442, "tk","TM" }, // Turkmen Turkmenistan michael@0: { 0x0443, "uz","UZ" }, // Uzbek (Latin) Uzbekistan michael@0: { 0x0444, "tt","RU" }, // Tatar Russia michael@0: { 0x0445, "bn","IN" }, // Bengali India michael@0: { 0x0446, "pa","IN" }, // Punjabi India michael@0: { 0x0447, "gu","IN" }, // Gujarati India michael@0: { 0x0448, "or","IN" }, // Oriya India michael@0: { 0x0448, "wo","SN" }, // Wolof Senegal michael@0: { 0x0449, "ta","IN" }, // Tamil India michael@0: { 0x044A, "te","IN" }, // Telugu India michael@0: { 0x044B, "kn","IN" }, // Kannada India michael@0: { 0x044C, "ml","IN" }, // Malayalam India michael@0: { 0x044D, "as","IN" }, // Assamese India michael@0: { 0x044E, "mr","IN" }, // Marathi India michael@0: { 0x044F, "sa","IN" }, // Sanskrit India michael@0: { 0x0450, "mn","MN" }, // Mongolian (Cyrillic) Mongolia michael@0: { 0x0451, "bo","CN" }, // Tibetan PRC michael@0: { 0x0452, "cy","GB" }, // Welsh United Kingdom michael@0: { 0x0453, "km","KH" }, // Khmer Cambodia michael@0: { 0x0454, "lo","LA" }, // Lao Lao P.D.R. michael@0: { 0x0455, "my","MM" }, // Burmese Myanmar - not listed in Microsoft docs anymore michael@0: { 0x0456, "gl","ES" }, // Galician Galician michael@0: { 0x0457, "kok","IN" }, // Konkani India michael@0: { 0x045A, "syr","TR" }, // Syriac Syria michael@0: { 0x045B, "si","LK" }, // Sinhala Sri Lanka michael@0: { 0x045D, "iu","CA" }, // Inuktitut Canada michael@0: { 0x045E, "am","ET" }, // Amharic Ethiopia michael@0: { 0x0461, "ne","NP" }, // Nepali Nepal michael@0: { 0x0462, "fy","NL" }, // Frisian Netherlands michael@0: { 0x0463, "ps","AF" }, // Pashto Afghanistan michael@0: { 0x0464, "fil","PH" }, // Filipino Philippines michael@0: { 0x0465, "dv","MV" }, // Divehi Maldives michael@0: { 0x0468, "ha","NG" }, // Hausa (Latin) Nigeria michael@0: { 0x046A, "yo","NG" }, // Yoruba Nigeria michael@0: { 0x046B, "qu","BO" }, // Quechua Bolivia michael@0: { 0x046C, "st","ZA" }, // Sesotho sa Leboa South Africa michael@0: { 0x046D, "ba","RU" }, // Bashkir Russia michael@0: { 0x046E, "lb","LU" }, // Luxembourgish Luxembourg michael@0: { 0x046F, "kl","GL" }, // Greenlandic Greenland michael@0: { 0x0470, "ig","NG" }, // Igbo Nigeria michael@0: { 0x0478, "ii","CN" }, // Yi PRC michael@0: { 0x047A, "arn","CL" }, // Mapudungun Chile michael@0: { 0x047C, "moh","CA" }, // Mohawk Mohawk michael@0: { 0x047E, "br","FR" }, // Breton France michael@0: { 0x0480, "ug","CN" }, // Uighur PRC michael@0: { 0x0481, "mi","NZ" }, // Maori New Zealand michael@0: { 0x0482, "oc","FR" }, // Occitan France michael@0: { 0x0483, "co","FR" }, // Corsican France michael@0: { 0x0484, "gsw","FR" }, // Alsatian France michael@0: { 0x0485, "sah","RU" }, // Yakut Russia michael@0: { 0x0486, "qut","GT" }, // K'iche Guatemala michael@0: { 0x0487, "rw","RW" }, // Kinyarwanda Rwanda michael@0: { 0x048C, "gbz","AF" }, // Dari Afghanistan michael@0: { 0x0801, "ar","IQ" }, // Arabic Iraq michael@0: { 0x0804, "zn","CH" }, // Chinese People's Republic of China michael@0: { 0x0807, "de","CH" }, // German Switzerland michael@0: { 0x0809, "en","GB" }, // English United Kingdom michael@0: { 0x080A, "es","MX" }, // Spanish Mexico michael@0: { 0x080C, "fr","BE" }, // French Belgium michael@0: { 0x0810, "it","CH" }, // Italian Switzerland michael@0: { 0x0813, "nl","BE" }, // Dutch Belgium michael@0: { 0x0814, "nn","NO" }, // Norwegian (Nynorsk) Norway michael@0: { 0x0816, "pt","PT" }, // Portuguese Portugal michael@0: { 0x081A, "sh","RS" }, // Serbian (Latin) Serbia michael@0: { 0x081D, "sv","FI" }, // Sweden Finland michael@0: { 0x082C, "az","AZ" }, // Azeri (Cyrillic) Azerbaijan michael@0: { 0x082E, "dsb","DE" }, // Lower Sorbian Germany michael@0: { 0x083B, "se","SE" }, // Sami (Northern) Sweden michael@0: { 0x083C, "ga","IE" }, // Irish Ireland michael@0: { 0x083E, "ms","BN" }, // Malay Brunei Darussalam michael@0: { 0x0843, "uz","UZ" }, // Uzbek (Cyrillic) Uzbekistan michael@0: { 0x0845, "bn","BD" }, // Bengali Bangladesh michael@0: { 0x0850, "mn","MN" }, // Mongolian (Traditional) People's Republic of China michael@0: { 0x085D, "iu","CA" }, // Inuktitut (Latin) Canada michael@0: { 0x085F, "ber","DZ" }, // Tamazight (Latin) Algeria michael@0: { 0x086B, "es","EC" }, // Quechua Ecuador michael@0: { 0x0C01, "ar","EG" }, // Arabic Egypt michael@0: { 0x0C04, "zh","HK" }, // Chinese Hong Kong S.A.R. michael@0: { 0x0C07, "de","AT" }, // German Austria michael@0: { 0x0C09, "en","AU" }, // English Australia michael@0: { 0x0C0A, "es","ES" }, // Spanish (Modern Sort) Spain michael@0: { 0x0C0C, "fr","CA" }, // French Canada michael@0: { 0x0C1A, "sr","CS" }, // Serbian (Cyrillic) Serbia michael@0: { 0x0C3B, "se","FI" }, // Sami (Northern) Finland michael@0: { 0x0C6B, "qu","PE" }, // Quechua Peru michael@0: { 0x1001, "ar","LY" }, // Arabic Libya michael@0: { 0x1004, "zh","SG" }, // Chinese Singapore michael@0: { 0x1007, "de","LU" }, // German Luxembourg michael@0: { 0x1009, "en","CA" }, // English Canada michael@0: { 0x100A, "es","GT" }, // Spanish Guatemala michael@0: { 0x100C, "fr","CH" }, // French Switzerland michael@0: { 0x101A, "hr","BA" }, // Croatian (Latin) Bosnia and Herzegovina michael@0: { 0x103B, "smj","NO" }, // Sami (Lule) Norway michael@0: { 0x1401, "ar","DZ" }, // Arabic Algeria michael@0: { 0x1404, "zh","MO" }, // Chinese Macao S.A.R. michael@0: { 0x1407, "de","LI" }, // German Liechtenstein michael@0: { 0x1409, "en","NZ" }, // English New Zealand michael@0: { 0x140A, "es","CR" }, // Spanish Costa Rica michael@0: { 0x140C, "fr","LU" }, // French Luxembourg michael@0: { 0x141A, "bs","BA" }, // Bosnian (Latin) Bosnia and Herzegovina michael@0: { 0x143B, "smj","SE" }, // Sami (Lule) Sweden michael@0: { 0x1801, "ar","MA" }, // Arabic Morocco michael@0: { 0x1809, "en","IE" }, // English Ireland michael@0: { 0x180A, "es","PA" }, // Spanish Panama michael@0: { 0x180C, "fr","MC" }, // French Principality of Monoco michael@0: { 0x181A, "sh","BA" }, // Serbian (Latin) Bosnia and Herzegovina michael@0: { 0x183B, "sma","NO" }, // Sami (Southern) Norway michael@0: { 0x1C01, "ar","TN" }, // Arabic Tunisia michael@0: { 0x1C09, "en","ZA" }, // English South Africa michael@0: { 0x1C0A, "es","DO" }, // Spanish Dominican Republic michael@0: { 0x1C1A, "sr","BA" }, // Serbian (Cyrillic) Bosnia and Herzegovina michael@0: { 0x1C3B, "sma","SE" }, // Sami (Southern) Sweden michael@0: { 0x2001, "ar","OM" }, // Arabic Oman michael@0: { 0x2009, "en","JM" }, // English Jamaica michael@0: { 0x200A, "es","VE" }, // Spanish Venezuela michael@0: { 0x201A, "bs","BA" }, // Bosnian (Cyrillic) Bosnia and Herzegovina michael@0: { 0x203B, "sms","FI" }, // Sami (Skolt) Finland michael@0: { 0x2401, "ar","YE" }, // Arabic Yemen michael@0: { 0x2409, "en","BS" }, // English Caribbean michael@0: { 0x240A, "es","CO" }, // Spanish Colombia michael@0: { 0x243B, "smn","FI" }, // Sami (Inari) Finland michael@0: { 0x2801, "ar","SY" }, // Arabic Syria michael@0: { 0x2809, "en","BZ" }, // English Belize michael@0: { 0x280A, "es","PE" }, // Spanish Peru michael@0: { 0x2C01, "ar","JO" }, // Arabic Jordan michael@0: { 0x2C09, "en","TT" }, // English Trinidad and Tobago michael@0: { 0x2C0A, "es","AR" }, // Spanish Argentina michael@0: { 0x3001, "ar","LB" }, // Arabic Lebanon michael@0: { 0x3009, "en","ZW" }, // English Zimbabwe michael@0: { 0x300A, "es","EC" }, // Spanish Ecuador michael@0: { 0x3401, "ar","KW" }, // Arabic Kuwait michael@0: { 0x3409, "en","PH" }, // English Republic of the Philippines michael@0: { 0x340A, "es","CL" }, // Spanish Chile michael@0: { 0x3801, "ar","AE" }, // Arabic U.A.E. michael@0: { 0x380A, "es","UY" }, // Spanish Uruguay michael@0: { 0x3C01, "ar","BH" }, // Arabic Bahrain michael@0: { 0x3C0A, "es","PY" }, // Spanish Paraguay michael@0: { 0x4001, "ar","QA" }, // Arabic Qatar michael@0: { 0x4009, "en","IN" }, // English India michael@0: { 0x400A, "es","BO" }, // Spanish Bolivia michael@0: { 0x4409, "en","MY" }, // English Malaysia michael@0: { 0x440A, "es","SV" }, // Spanish El Salvador michael@0: { 0x4809, "en","SG" }, // English Singapore michael@0: { 0x480A, "es","HN" }, // Spanish Honduras michael@0: { 0x4C0A, "es","NI" }, // Spanish Nicaragua michael@0: { 0x500A, "es","PR" }, // Spanish Puerto Rico michael@0: { 0x540A, "es","US" } // Spanish United States michael@0: }; michael@0: michael@0: class Locale2Lang michael@0: { michael@0: Locale2Lang(const Locale2Lang &); michael@0: Locale2Lang & operator = (const Locale2Lang &); michael@0: michael@0: public: michael@0: Locale2Lang() : mSeedPosition(128) michael@0: { michael@0: memset((void*)mLangLookup, 0, sizeof(mLangLookup)); michael@0: // create a tri lookup on first 2 letters of language code michael@0: static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry); michael@0: for (int i = 0; i < maxIndex; i++) michael@0: { michael@0: size_t a = LANG_ENTRIES[i].maLangStr[0] - 'a'; michael@0: size_t b = LANG_ENTRIES[i].maLangStr[1] - 'a'; michael@0: if (mLangLookup[a][b]) michael@0: { michael@0: const IsoLangEntry ** old = mLangLookup[a][b]; michael@0: int len = 1; michael@0: while (old[len]) len++; michael@0: len += 2; michael@0: mLangLookup[a][b] = gralloc(len); michael@0: if (!mLangLookup[a][b]) michael@0: { michael@0: mLangLookup[a][b] = old; michael@0: continue; michael@0: } michael@0: mLangLookup[a][b][--len] = NULL; michael@0: mLangLookup[a][b][--len] = &LANG_ENTRIES[i]; michael@0: while (--len >= 0) michael@0: { michael@0: assert(len >= 0); michael@0: mLangLookup[a][b][len] = old[len]; michael@0: } michael@0: free(old); michael@0: } michael@0: else michael@0: { michael@0: mLangLookup[a][b] = gralloc(2); michael@0: if (!mLangLookup[a][b]) continue; michael@0: mLangLookup[a][b][1] = NULL; michael@0: mLangLookup[a][b][0] = &LANG_ENTRIES[i]; michael@0: } michael@0: } michael@0: while (2 * mSeedPosition < maxIndex) michael@0: mSeedPosition *= 2; michael@0: }; michael@0: ~Locale2Lang() michael@0: { michael@0: for (int i = 0; i != 26; ++i) michael@0: for (int j = 0; j != 26; ++j) michael@0: free(mLangLookup[i][j]); michael@0: } michael@0: unsigned short getMsId(const char * locale) const michael@0: { michael@0: size_t length = strlen(locale); michael@0: size_t langLength = length; michael@0: const char * language = locale; michael@0: const char * script = NULL; michael@0: const char * region = NULL; michael@0: size_t regionLength = 0; michael@0: const char * dash = strchr(locale, '-'); michael@0: if (dash && (dash != locale)) michael@0: { michael@0: langLength = (dash - locale); michael@0: size_t nextPartLength = length - langLength - 1; michael@0: if (nextPartLength >= 2) michael@0: { michael@0: script = ++dash; michael@0: dash = strchr(dash, '-'); michael@0: if (dash) michael@0: { michael@0: nextPartLength = (dash - script); michael@0: region = ++dash; michael@0: } michael@0: if (nextPartLength == 2 && michael@0: (locale[langLength+1] > 0x40) && (locale[langLength+1] < 0x5B) && michael@0: (locale[langLength+2] > 0x40) && (locale[langLength+2] < 0x5B)) michael@0: { michael@0: region = script; michael@0: regionLength = nextPartLength; michael@0: script = NULL; michael@0: } michael@0: else if (nextPartLength == 4) michael@0: { michael@0: if (dash) michael@0: { michael@0: dash = strchr(dash, '-'); michael@0: if (dash) michael@0: { michael@0: nextPartLength = (dash - region); michael@0: } michael@0: else michael@0: { michael@0: nextPartLength = langLength - (region - locale); michael@0: } michael@0: regionLength = nextPartLength; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: size_t a = 'e' - 'a'; michael@0: size_t b = 'n' - 'a'; michael@0: unsigned short langId = 0; michael@0: int i = 0; michael@0: switch (langLength) michael@0: { michael@0: case 2: michael@0: { michael@0: a = language[0] - 'a'; michael@0: b = language[1] - 'a'; michael@0: if ((a < 26) && (b < 26) && mLangLookup[a][b]) michael@0: { michael@0: while (mLangLookup[a][b][i]) michael@0: { michael@0: if (mLangLookup[a][b][i]->maLangStr[2] != '\0') michael@0: { michael@0: ++i; michael@0: continue; michael@0: } michael@0: if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0)) michael@0: { michael@0: langId = mLangLookup[a][b][i]->mnLang; michael@0: break; michael@0: } michael@0: else if (langId == 0) michael@0: { michael@0: // possible fallback code michael@0: langId = mLangLookup[a][b][i]->mnLang; michael@0: } michael@0: ++i; michael@0: } michael@0: } michael@0: } michael@0: break; michael@0: case 3: michael@0: { michael@0: a = language[0] - 'a'; michael@0: b = language[1] - 'a'; michael@0: if (mLangLookup[a][b]) michael@0: { michael@0: while (mLangLookup[a][b][i]) michael@0: { michael@0: if (mLangLookup[a][b][i]->maLangStr[2] != language[2]) michael@0: { michael@0: ++i; michael@0: continue; michael@0: } michael@0: if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0)) michael@0: { michael@0: langId = mLangLookup[a][b][i]->mnLang; michael@0: break; michael@0: } michael@0: else if (langId == 0) michael@0: { michael@0: // possible fallback code michael@0: langId = mLangLookup[a][b][i]->mnLang; michael@0: } michael@0: ++i; michael@0: } michael@0: } michael@0: } michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: if (langId == 0) langId = 0x409; michael@0: return langId; michael@0: } michael@0: const IsoLangEntry * findEntryById(unsigned short langId) const michael@0: { michael@0: static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry); michael@0: int window = mSeedPosition; michael@0: int guess = mSeedPosition - 1; michael@0: while (LANG_ENTRIES[guess].mnLang != langId) michael@0: { michael@0: window /= 2; michael@0: if (window == 0) return NULL; michael@0: guess += (LANG_ENTRIES[guess].mnLang > langId)? -window : window; michael@0: while (guess >= maxIndex) michael@0: { michael@0: window /= 2; michael@0: guess -= window; michael@0: assert(window); michael@0: } michael@0: } michael@0: return &LANG_ENTRIES[guess]; michael@0: } michael@0: michael@0: CLASS_NEW_DELETE; michael@0: michael@0: private: michael@0: const IsoLangEntry ** mLangLookup[26][26]; michael@0: int mSeedPosition; michael@0: }; michael@0: michael@0: } // namespace graphite2