gfx/graphite2/src/inc/locale2lcid.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*  GRAPHITE2 LICENSING
     3     Copyright 2010, SIL International
     4     All rights reserved.
     6     This library is free software; you can redistribute it and/or modify
     7     it under the terms of the GNU Lesser General Public License as published
     8     by the Free Software Foundation; either version 2.1 of License, or
     9     (at your option) any later version.
    11     This program is distributed in the hope that it will be useful,
    12     but WITHOUT ANY WARRANTY; without even the implied warranty of
    13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    14     Lesser General Public License for more details.
    16     You should also have received a copy of the GNU Lesser General Public
    17     License along with this library in the file named "LICENSE".
    18     If not, write to the Free Software Foundation, 51 Franklin Street,
    19     Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
    20     internet at http://www.fsf.org/licenses/lgpl.html.
    22 Alternatively, the contents of this file may be used under the terms of the
    23 Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
    24 License, as published by the Free Software Foundation, either version 2
    25 of the License or (at your option) any later version.
    26 */
    27 #pragma once
    28 #include <cstring>
    29 #include <cassert>
    31 #include "inc/Main.h"
    34 namespace graphite2 {
    36 struct IsoLangEntry
    37 {
    38     unsigned short mnLang;
    39     const char maLangStr[4];
    40     const char maCountry[3];
    41 };
    43 // Windows Language ID, Locale ISO-639 language, country code as used in
    44 // naming table of OpenType fonts
    45 const IsoLangEntry LANG_ENTRIES[] = {
    46     { 0x0401, "ar","SA" }, // Arabic Saudi Arabia
    47     { 0x0402, "bg","BG" }, // Bulgarian Bulgaria
    48     { 0x0403, "ca","ES" }, // Catalan Catalan
    49     { 0x0404, "zh","TW" }, // Chinese Taiwan
    50     { 0x0405, "cs","CZ" }, // Czech Czech Republic
    51     { 0x0406, "da","DK" }, // Danish Denmark
    52     { 0x0407, "de","DE" }, // German Germany
    53     { 0x0408, "el","GR" }, // Greek Greece
    54     { 0x0409, "en","US" }, // English United States
    55     { 0x040A, "es","ES" }, // Spanish (Traditional Sort) Spain
    56     { 0x040B, "fi","FI" }, // Finnish Finland
    57     { 0x040C, "fr","FR" }, // French France
    58     { 0x040D, "he","IL" }, // Hebrew Israel
    59     { 0x040E, "hu","HU" }, // Hungarian Hungary
    60     { 0x040F, "is","IS" }, // Icelandic Iceland
    61     { 0x0410, "it","IT" }, // Italian Italy
    62     { 0x0411, "jp","JP" }, // Japanese Japan
    63     { 0x0412, "ko","KR" }, // Korean Korea
    64     { 0x0413, "nl","NL" }, // Dutch Netherlands
    65     { 0x0414, "no","NO" }, // Norwegian (Bokmal) Norway
    66     { 0x0415, "pl","PL" }, // Polish Poland
    67     { 0x0416, "pt","BR" }, // Portuguese Brazil
    68     { 0x0417, "rm","CH" }, // Romansh Switzerland
    69     { 0x0418, "ro","RO" }, // Romanian Romania
    70     { 0x0419, "ru","RU" }, // Russian Russia
    71     { 0x041A, "hr","HR" }, // Croatian Croatia
    72     { 0x041B, "sk","SK" }, // Slovak Slovakia
    73     { 0x041C, "sq","AL" }, // Albanian Albania
    74     { 0x041D, "sv","SE" }, // Swedish Sweden
    75     { 0x041E, "th","TH" }, // Thai Thailand
    76     { 0x041F, "tr","TR" }, // Turkish Turkey
    77     { 0x0420, "ur","PK" }, // Urdu Islamic Republic of Pakistan
    78     { 0x0421, "id","ID" }, // Indonesian Indonesia
    79     { 0x0422, "uk","UA" }, // Ukrainian Ukraine
    80     { 0x0423, "be","BY" }, // Belarusian Belarus
    81     { 0x0424, "sl","SI" }, // Slovenian Slovenia
    82     { 0x0425, "et","EE" }, // Estonian Estonia
    83     { 0x0426, "lv","LV" }, // Latvian Latvia
    84     { 0x0427, "lt","LT" }, // Lithuanian Lithuania
    85     { 0x0428, "tg","TJ" }, // Tajik (Cyrillic) Tajikistan
    86     { 0x042A, "vi","VN" }, // Vietnamese Vietnam
    87     { 0x042B, "hy","AM" }, // Armenian Armenia
    88     { 0x042C, "az","AZ" }, // Azeri (Latin) Azerbaijan
    89     { 0x042D, "eu","" }, // Basque Basque
    90     { 0x042E, "hsb","DE" }, // Upper Sorbian Germany
    91     { 0x042F, "mk","MK" }, // Macedonian (FYROM) Former Yugoslav Republic of Macedonia
    92     { 0x0432, "tn","ZA" }, // Setswana South Africa
    93     { 0x0434, "xh","ZA" }, // isiXhosa South Africa
    94     { 0x0435, "zu","ZA" }, // isiZulu South Africa
    95     { 0x0436, "af","ZA" }, // Afrikaans South Africa
    96     { 0x0437, "ka","GE" }, // Georgian Georgia
    97     { 0x0438, "fo","FO" }, // Faroese Faroe Islands
    98     { 0x0439, "hi","IN" }, // Hindi India
    99     { 0x043A, "mt","MT" }, // Maltese Malta
   100     { 0x043B, "se","NO" }, // Sami (Northern) Norway
   101     { 0x043E, "ms","MY" }, // Malay Malaysia
   102     { 0x043F, "kk","KZ" }, // Kazakh Kazakhstan
   103     { 0x0440, "ky","KG" }, // Kyrgyz Kyrgyzstan
   104     { 0x0441, "sw","KE" }, // Kiswahili Kenya
   105     { 0x0442, "tk","TM" }, // Turkmen Turkmenistan
   106     { 0x0443, "uz","UZ" }, // Uzbek (Latin) Uzbekistan
   107     { 0x0444, "tt","RU" }, // Tatar Russia
   108     { 0x0445, "bn","IN" }, // Bengali India
   109     { 0x0446, "pa","IN" }, // Punjabi India
   110     { 0x0447, "gu","IN" }, // Gujarati India
   111     { 0x0448, "or","IN" }, // Oriya India
   112     { 0x0448, "wo","SN" }, // Wolof Senegal
   113     { 0x0449, "ta","IN" }, // Tamil India
   114     { 0x044A, "te","IN" }, // Telugu India
   115     { 0x044B, "kn","IN" }, // Kannada India
   116     { 0x044C, "ml","IN" }, // Malayalam India
   117     { 0x044D, "as","IN" }, // Assamese India
   118     { 0x044E, "mr","IN" }, // Marathi India
   119     { 0x044F, "sa","IN" }, // Sanskrit India
   120     { 0x0450, "mn","MN" }, // Mongolian (Cyrillic) Mongolia
   121     { 0x0451, "bo","CN" }, // Tibetan PRC
   122     { 0x0452, "cy","GB" }, // Welsh United Kingdom
   123     { 0x0453, "km","KH" }, // Khmer Cambodia
   124     { 0x0454, "lo","LA" }, // Lao Lao P.D.R.
   125     { 0x0455, "my","MM" }, // Burmese Myanmar - not listed in Microsoft docs anymore
   126     { 0x0456, "gl","ES" }, // Galician Galician
   127     { 0x0457, "kok","IN" }, // Konkani India
   128     { 0x045A, "syr","TR" }, // Syriac Syria
   129     { 0x045B, "si","LK" }, // Sinhala Sri Lanka
   130     { 0x045D, "iu","CA" }, // Inuktitut Canada
   131     { 0x045E, "am","ET" }, // Amharic Ethiopia
   132     { 0x0461, "ne","NP" }, // Nepali Nepal
   133     { 0x0462, "fy","NL" }, // Frisian Netherlands
   134     { 0x0463, "ps","AF" }, // Pashto Afghanistan
   135     { 0x0464, "fil","PH" }, // Filipino Philippines
   136     { 0x0465, "dv","MV" }, // Divehi Maldives
   137     { 0x0468, "ha","NG" }, // Hausa (Latin) Nigeria
   138     { 0x046A, "yo","NG" }, // Yoruba Nigeria
   139     { 0x046B, "qu","BO" }, // Quechua Bolivia
   140     { 0x046C, "st","ZA" }, // Sesotho sa Leboa South Africa
   141     { 0x046D, "ba","RU" }, // Bashkir Russia
   142     { 0x046E, "lb","LU" }, // Luxembourgish Luxembourg
   143     { 0x046F, "kl","GL" }, // Greenlandic Greenland
   144     { 0x0470, "ig","NG" }, // Igbo Nigeria
   145     { 0x0478, "ii","CN" }, // Yi PRC
   146     { 0x047A, "arn","CL" }, // Mapudungun Chile
   147     { 0x047C, "moh","CA" }, // Mohawk Mohawk
   148     { 0x047E, "br","FR" }, // Breton France
   149     { 0x0480, "ug","CN" }, // Uighur PRC
   150     { 0x0481, "mi","NZ" }, // Maori New Zealand
   151     { 0x0482, "oc","FR" }, // Occitan France
   152     { 0x0483, "co","FR" }, // Corsican France
   153     { 0x0484, "gsw","FR" }, // Alsatian France
   154     { 0x0485, "sah","RU" }, // Yakut Russia
   155     { 0x0486, "qut","GT" }, // K'iche Guatemala
   156     { 0x0487, "rw","RW" }, // Kinyarwanda Rwanda
   157     { 0x048C, "gbz","AF" }, // Dari Afghanistan
   158     { 0x0801, "ar","IQ" }, // Arabic Iraq
   159     { 0x0804, "zn","CH" }, // Chinese People's Republic of China
   160     { 0x0807, "de","CH" }, // German Switzerland
   161     { 0x0809, "en","GB" }, // English United Kingdom
   162     { 0x080A, "es","MX" }, // Spanish Mexico
   163     { 0x080C, "fr","BE" }, // French Belgium
   164     { 0x0810, "it","CH" }, // Italian Switzerland
   165     { 0x0813, "nl","BE" }, // Dutch Belgium
   166     { 0x0814, "nn","NO" }, // Norwegian (Nynorsk) Norway
   167     { 0x0816, "pt","PT" }, // Portuguese Portugal
   168     { 0x081A, "sh","RS" }, // Serbian (Latin) Serbia
   169     { 0x081D, "sv","FI" }, // Sweden Finland
   170     { 0x082C, "az","AZ" }, // Azeri (Cyrillic) Azerbaijan
   171     { 0x082E, "dsb","DE" }, // Lower Sorbian Germany
   172     { 0x083B, "se","SE" }, // Sami (Northern) Sweden
   173     { 0x083C, "ga","IE" }, // Irish Ireland
   174     { 0x083E, "ms","BN" }, // Malay Brunei Darussalam
   175     { 0x0843, "uz","UZ" }, // Uzbek (Cyrillic) Uzbekistan
   176     { 0x0845, "bn","BD" }, // Bengali Bangladesh
   177     { 0x0850, "mn","MN" }, // Mongolian (Traditional) People's Republic of China
   178     { 0x085D, "iu","CA" }, // Inuktitut (Latin) Canada
   179     { 0x085F, "ber","DZ" }, // Tamazight (Latin) Algeria
   180     { 0x086B, "es","EC" }, // Quechua Ecuador
   181     { 0x0C01, "ar","EG" }, // Arabic Egypt
   182     { 0x0C04, "zh","HK" }, // Chinese Hong Kong S.A.R.
   183     { 0x0C07, "de","AT" }, // German Austria
   184     { 0x0C09, "en","AU" }, // English Australia
   185     { 0x0C0A, "es","ES" }, // Spanish (Modern Sort) Spain
   186     { 0x0C0C, "fr","CA" }, // French Canada
   187     { 0x0C1A, "sr","CS" }, // Serbian (Cyrillic) Serbia
   188     { 0x0C3B, "se","FI" }, // Sami (Northern) Finland
   189     { 0x0C6B, "qu","PE" }, // Quechua Peru
   190     { 0x1001, "ar","LY" }, // Arabic Libya
   191     { 0x1004, "zh","SG" }, // Chinese Singapore
   192     { 0x1007, "de","LU" }, // German Luxembourg
   193     { 0x1009, "en","CA" }, // English Canada
   194     { 0x100A, "es","GT" }, // Spanish Guatemala
   195     { 0x100C, "fr","CH" }, // French Switzerland
   196     { 0x101A, "hr","BA" }, // Croatian (Latin) Bosnia and Herzegovina
   197     { 0x103B, "smj","NO" }, // Sami (Lule) Norway
   198     { 0x1401, "ar","DZ" }, // Arabic Algeria
   199     { 0x1404, "zh","MO" }, // Chinese Macao S.A.R.
   200     { 0x1407, "de","LI" }, // German Liechtenstein
   201     { 0x1409, "en","NZ" }, // English New Zealand
   202     { 0x140A, "es","CR" }, // Spanish Costa Rica
   203     { 0x140C, "fr","LU" }, // French Luxembourg
   204     { 0x141A, "bs","BA" }, // Bosnian (Latin) Bosnia and Herzegovina
   205     { 0x143B, "smj","SE" }, // Sami (Lule) Sweden
   206     { 0x1801, "ar","MA" }, // Arabic Morocco
   207     { 0x1809, "en","IE" }, // English Ireland
   208     { 0x180A, "es","PA" }, // Spanish Panama
   209     { 0x180C, "fr","MC" }, // French Principality of Monoco
   210     { 0x181A, "sh","BA" }, // Serbian (Latin) Bosnia and Herzegovina
   211     { 0x183B, "sma","NO" }, // Sami (Southern) Norway
   212     { 0x1C01, "ar","TN" }, // Arabic Tunisia
   213     { 0x1C09, "en","ZA" }, // English South Africa
   214     { 0x1C0A, "es","DO" }, // Spanish Dominican Republic
   215     { 0x1C1A, "sr","BA" }, // Serbian (Cyrillic) Bosnia and Herzegovina
   216     { 0x1C3B, "sma","SE" }, // Sami (Southern) Sweden
   217     { 0x2001, "ar","OM" }, // Arabic Oman
   218     { 0x2009, "en","JM" }, // English Jamaica
   219     { 0x200A, "es","VE" }, // Spanish Venezuela
   220     { 0x201A, "bs","BA" }, // Bosnian (Cyrillic) Bosnia and Herzegovina
   221     { 0x203B, "sms","FI" }, // Sami (Skolt) Finland
   222     { 0x2401, "ar","YE" }, // Arabic Yemen
   223     { 0x2409, "en","BS" }, // English Caribbean
   224     { 0x240A, "es","CO" }, // Spanish Colombia
   225     { 0x243B, "smn","FI" }, // Sami (Inari) Finland
   226     { 0x2801, "ar","SY" }, // Arabic Syria
   227     { 0x2809, "en","BZ" }, // English Belize
   228     { 0x280A, "es","PE" }, // Spanish Peru
   229     { 0x2C01, "ar","JO" }, // Arabic Jordan
   230     { 0x2C09, "en","TT" }, // English Trinidad and Tobago
   231     { 0x2C0A, "es","AR" }, // Spanish Argentina
   232     { 0x3001, "ar","LB" }, // Arabic Lebanon
   233     { 0x3009, "en","ZW" }, // English Zimbabwe
   234     { 0x300A, "es","EC" }, // Spanish Ecuador
   235     { 0x3401, "ar","KW" }, // Arabic Kuwait
   236     { 0x3409, "en","PH" }, // English Republic of the Philippines
   237     { 0x340A, "es","CL" }, // Spanish Chile
   238     { 0x3801, "ar","AE" }, // Arabic U.A.E.
   239     { 0x380A, "es","UY" }, // Spanish Uruguay
   240     { 0x3C01, "ar","BH" }, // Arabic Bahrain
   241     { 0x3C0A, "es","PY" }, // Spanish Paraguay
   242     { 0x4001, "ar","QA" }, // Arabic Qatar
   243     { 0x4009, "en","IN" }, // English India
   244     { 0x400A, "es","BO" }, // Spanish Bolivia
   245     { 0x4409, "en","MY" }, // English Malaysia
   246     { 0x440A, "es","SV" }, // Spanish El Salvador
   247     { 0x4809, "en","SG" }, // English Singapore
   248     { 0x480A, "es","HN" }, // Spanish Honduras
   249     { 0x4C0A, "es","NI" }, // Spanish Nicaragua
   250     { 0x500A, "es","PR" }, // Spanish Puerto Rico
   251     { 0x540A, "es","US" } // Spanish United States
   252 };
   254 class Locale2Lang
   255 {
   256     Locale2Lang(const Locale2Lang &);
   257     Locale2Lang & operator = (const Locale2Lang &);
   259 public:
   260     Locale2Lang() : mSeedPosition(128)
   261     {
   262         memset((void*)mLangLookup, 0, sizeof(mLangLookup));
   263         // create a tri lookup on first 2 letters of language code
   264         static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
   265         for (int i = 0; i < maxIndex; i++)
   266         {
   267             size_t a = LANG_ENTRIES[i].maLangStr[0] - 'a';
   268             size_t b = LANG_ENTRIES[i].maLangStr[1] - 'a';
   269             if (mLangLookup[a][b])
   270             {
   271                 const IsoLangEntry ** old = mLangLookup[a][b];
   272                 int len = 1;
   273                 while (old[len]) len++;
   274                 len += 2;
   275                 mLangLookup[a][b] = gralloc<const IsoLangEntry *>(len);
   276                 if (!mLangLookup[a][b])
   277                 {
   278                     mLangLookup[a][b] = old;
   279                     continue;
   280                 }
   281                 mLangLookup[a][b][--len] = NULL;
   282                 mLangLookup[a][b][--len] = &LANG_ENTRIES[i];
   283                 while (--len >= 0)
   284                 {
   285                     assert(len >= 0);
   286                     mLangLookup[a][b][len] = old[len];
   287                 }
   288                 free(old);
   289             }
   290             else
   291             {
   292                 mLangLookup[a][b] = gralloc<const IsoLangEntry *>(2);
   293                 if (!mLangLookup[a][b]) continue;
   294                 mLangLookup[a][b][1] = NULL;
   295                 mLangLookup[a][b][0] = &LANG_ENTRIES[i];
   296             }
   297         }
   298         while (2 * mSeedPosition < maxIndex)
   299             mSeedPosition *= 2;
   300     };
   301     ~Locale2Lang()
   302     {
   303         for (int i = 0; i != 26; ++i)
   304             for (int j = 0; j != 26; ++j)
   305                 free(mLangLookup[i][j]);
   306     }
   307     unsigned short getMsId(const char * locale) const
   308     {
   309         size_t length = strlen(locale);
   310         size_t langLength = length;
   311         const char * language = locale;
   312         const char * script = NULL;
   313         const char * region = NULL;
   314         size_t regionLength = 0;
   315         const char * dash = strchr(locale, '-');
   316         if (dash && (dash != locale))
   317         {
   318             langLength = (dash - locale);
   319             size_t nextPartLength = length - langLength - 1;
   320             if (nextPartLength >= 2)
   321             {
   322                 script = ++dash;
   323                 dash = strchr(dash, '-');
   324                 if (dash)
   325                 {
   326                     nextPartLength = (dash - script);
   327                     region = ++dash;
   328                 }
   329                 if (nextPartLength == 2 &&
   330                     (locale[langLength+1] > 0x40) && (locale[langLength+1] < 0x5B) &&
   331                     (locale[langLength+2] > 0x40) && (locale[langLength+2] < 0x5B))
   332                 {
   333                     region = script;
   334                     regionLength = nextPartLength;
   335                     script = NULL;
   336                 }
   337                 else if (nextPartLength == 4)
   338                 {
   339                     if (dash)
   340                     {
   341                         dash = strchr(dash, '-');
   342                         if (dash)
   343                         {
   344                             nextPartLength = (dash - region);
   345                         }
   346                         else
   347                         {
   348                             nextPartLength = langLength - (region - locale);
   349                         }
   350                         regionLength = nextPartLength;
   351                     }
   352                 }
   353             }
   354         }
   355         size_t a = 'e' - 'a';
   356         size_t b = 'n' - 'a';
   357         unsigned short langId = 0;
   358         int i = 0;
   359         switch (langLength)
   360         {
   361             case 2:
   362             {
   363                 a = language[0] - 'a';
   364                 b = language[1] - 'a';
   365                 if ((a < 26) && (b < 26) && mLangLookup[a][b])
   366                 {
   367                     while (mLangLookup[a][b][i])
   368                     {
   369                         if (mLangLookup[a][b][i]->maLangStr[2] != '\0')
   370                         {
   371                             ++i;
   372                             continue;
   373                         }
   374                         if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
   375                         {
   376                             langId = mLangLookup[a][b][i]->mnLang;
   377                             break;
   378                         }
   379                         else if (langId == 0)
   380                         {
   381                             // possible fallback code
   382                             langId = mLangLookup[a][b][i]->mnLang;
   383                         }
   384                         ++i;
   385                     }
   386                 }
   387             }
   388             break;
   389             case 3:
   390             {
   391                 a = language[0] - 'a';
   392                 b = language[1] - 'a';
   393                 if (mLangLookup[a][b])
   394                 {
   395                     while (mLangLookup[a][b][i])
   396                     {
   397                         if (mLangLookup[a][b][i]->maLangStr[2] != language[2])
   398                         {
   399                             ++i;
   400                             continue;
   401                         }
   402                         if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
   403                         {
   404                             langId = mLangLookup[a][b][i]->mnLang;
   405                             break;
   406                         }
   407                         else if (langId == 0)
   408                         {
   409                             // possible fallback code
   410                             langId = mLangLookup[a][b][i]->mnLang;
   411                         }
   412                         ++i;
   413                     }
   414                 }
   415             }
   416             break;
   417             default:
   418                 break;
   419         }
   420         if (langId == 0) langId = 0x409;
   421         return langId;
   422     }
   423     const IsoLangEntry * findEntryById(unsigned short langId) const
   424     {
   425         static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
   426         int window = mSeedPosition;
   427         int guess = mSeedPosition - 1;
   428         while (LANG_ENTRIES[guess].mnLang != langId)
   429         {
   430             window /= 2;
   431             if (window == 0) return NULL;
   432             guess += (LANG_ENTRIES[guess].mnLang > langId)? -window : window;
   433             while (guess >= maxIndex)
   434             {
   435                 window /= 2;
   436                 guess -= window;
   437                 assert(window);
   438             }
   439         }
   440         return &LANG_ENTRIES[guess];
   441     }
   443     CLASS_NEW_DELETE;
   445 private:
   446     const IsoLangEntry ** mLangLookup[26][26];
   447     int mSeedPosition;
   448 };
   450 } // namespace graphite2

mercurial