Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* GRAPHITE2 LICENSING
3 Copyright 2010, SIL International
4 All rights reserved.
6 This library is free software; you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published
8 by the Free Software Foundation; either version 2.1 of License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should also have received a copy of the GNU Lesser General Public
17 License along with this library in the file named "LICENSE".
18 If not, write to the Free Software Foundation, 51 Franklin Street,
19 Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
20 internet at http://www.fsf.org/licenses/lgpl.html.
22 Alternatively, the contents of this file may be used under the terms of the
23 Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
24 License, as published by the Free Software Foundation, either version 2
25 of the License or (at your option) any later version.
26 */
27 #pragma once
28 #include <cstring>
29 #include <cassert>
31 #include "inc/Main.h"
34 namespace graphite2 {
36 struct IsoLangEntry
37 {
38 unsigned short mnLang;
39 const char maLangStr[4];
40 const char maCountry[3];
41 };
43 // Windows Language ID, Locale ISO-639 language, country code as used in
44 // naming table of OpenType fonts
45 const IsoLangEntry LANG_ENTRIES[] = {
46 { 0x0401, "ar","SA" }, // Arabic Saudi Arabia
47 { 0x0402, "bg","BG" }, // Bulgarian Bulgaria
48 { 0x0403, "ca","ES" }, // Catalan Catalan
49 { 0x0404, "zh","TW" }, // Chinese Taiwan
50 { 0x0405, "cs","CZ" }, // Czech Czech Republic
51 { 0x0406, "da","DK" }, // Danish Denmark
52 { 0x0407, "de","DE" }, // German Germany
53 { 0x0408, "el","GR" }, // Greek Greece
54 { 0x0409, "en","US" }, // English United States
55 { 0x040A, "es","ES" }, // Spanish (Traditional Sort) Spain
56 { 0x040B, "fi","FI" }, // Finnish Finland
57 { 0x040C, "fr","FR" }, // French France
58 { 0x040D, "he","IL" }, // Hebrew Israel
59 { 0x040E, "hu","HU" }, // Hungarian Hungary
60 { 0x040F, "is","IS" }, // Icelandic Iceland
61 { 0x0410, "it","IT" }, // Italian Italy
62 { 0x0411, "jp","JP" }, // Japanese Japan
63 { 0x0412, "ko","KR" }, // Korean Korea
64 { 0x0413, "nl","NL" }, // Dutch Netherlands
65 { 0x0414, "no","NO" }, // Norwegian (Bokmal) Norway
66 { 0x0415, "pl","PL" }, // Polish Poland
67 { 0x0416, "pt","BR" }, // Portuguese Brazil
68 { 0x0417, "rm","CH" }, // Romansh Switzerland
69 { 0x0418, "ro","RO" }, // Romanian Romania
70 { 0x0419, "ru","RU" }, // Russian Russia
71 { 0x041A, "hr","HR" }, // Croatian Croatia
72 { 0x041B, "sk","SK" }, // Slovak Slovakia
73 { 0x041C, "sq","AL" }, // Albanian Albania
74 { 0x041D, "sv","SE" }, // Swedish Sweden
75 { 0x041E, "th","TH" }, // Thai Thailand
76 { 0x041F, "tr","TR" }, // Turkish Turkey
77 { 0x0420, "ur","PK" }, // Urdu Islamic Republic of Pakistan
78 { 0x0421, "id","ID" }, // Indonesian Indonesia
79 { 0x0422, "uk","UA" }, // Ukrainian Ukraine
80 { 0x0423, "be","BY" }, // Belarusian Belarus
81 { 0x0424, "sl","SI" }, // Slovenian Slovenia
82 { 0x0425, "et","EE" }, // Estonian Estonia
83 { 0x0426, "lv","LV" }, // Latvian Latvia
84 { 0x0427, "lt","LT" }, // Lithuanian Lithuania
85 { 0x0428, "tg","TJ" }, // Tajik (Cyrillic) Tajikistan
86 { 0x042A, "vi","VN" }, // Vietnamese Vietnam
87 { 0x042B, "hy","AM" }, // Armenian Armenia
88 { 0x042C, "az","AZ" }, // Azeri (Latin) Azerbaijan
89 { 0x042D, "eu","" }, // Basque Basque
90 { 0x042E, "hsb","DE" }, // Upper Sorbian Germany
91 { 0x042F, "mk","MK" }, // Macedonian (FYROM) Former Yugoslav Republic of Macedonia
92 { 0x0432, "tn","ZA" }, // Setswana South Africa
93 { 0x0434, "xh","ZA" }, // isiXhosa South Africa
94 { 0x0435, "zu","ZA" }, // isiZulu South Africa
95 { 0x0436, "af","ZA" }, // Afrikaans South Africa
96 { 0x0437, "ka","GE" }, // Georgian Georgia
97 { 0x0438, "fo","FO" }, // Faroese Faroe Islands
98 { 0x0439, "hi","IN" }, // Hindi India
99 { 0x043A, "mt","MT" }, // Maltese Malta
100 { 0x043B, "se","NO" }, // Sami (Northern) Norway
101 { 0x043E, "ms","MY" }, // Malay Malaysia
102 { 0x043F, "kk","KZ" }, // Kazakh Kazakhstan
103 { 0x0440, "ky","KG" }, // Kyrgyz Kyrgyzstan
104 { 0x0441, "sw","KE" }, // Kiswahili Kenya
105 { 0x0442, "tk","TM" }, // Turkmen Turkmenistan
106 { 0x0443, "uz","UZ" }, // Uzbek (Latin) Uzbekistan
107 { 0x0444, "tt","RU" }, // Tatar Russia
108 { 0x0445, "bn","IN" }, // Bengali India
109 { 0x0446, "pa","IN" }, // Punjabi India
110 { 0x0447, "gu","IN" }, // Gujarati India
111 { 0x0448, "or","IN" }, // Oriya India
112 { 0x0448, "wo","SN" }, // Wolof Senegal
113 { 0x0449, "ta","IN" }, // Tamil India
114 { 0x044A, "te","IN" }, // Telugu India
115 { 0x044B, "kn","IN" }, // Kannada India
116 { 0x044C, "ml","IN" }, // Malayalam India
117 { 0x044D, "as","IN" }, // Assamese India
118 { 0x044E, "mr","IN" }, // Marathi India
119 { 0x044F, "sa","IN" }, // Sanskrit India
120 { 0x0450, "mn","MN" }, // Mongolian (Cyrillic) Mongolia
121 { 0x0451, "bo","CN" }, // Tibetan PRC
122 { 0x0452, "cy","GB" }, // Welsh United Kingdom
123 { 0x0453, "km","KH" }, // Khmer Cambodia
124 { 0x0454, "lo","LA" }, // Lao Lao P.D.R.
125 { 0x0455, "my","MM" }, // Burmese Myanmar - not listed in Microsoft docs anymore
126 { 0x0456, "gl","ES" }, // Galician Galician
127 { 0x0457, "kok","IN" }, // Konkani India
128 { 0x045A, "syr","TR" }, // Syriac Syria
129 { 0x045B, "si","LK" }, // Sinhala Sri Lanka
130 { 0x045D, "iu","CA" }, // Inuktitut Canada
131 { 0x045E, "am","ET" }, // Amharic Ethiopia
132 { 0x0461, "ne","NP" }, // Nepali Nepal
133 { 0x0462, "fy","NL" }, // Frisian Netherlands
134 { 0x0463, "ps","AF" }, // Pashto Afghanistan
135 { 0x0464, "fil","PH" }, // Filipino Philippines
136 { 0x0465, "dv","MV" }, // Divehi Maldives
137 { 0x0468, "ha","NG" }, // Hausa (Latin) Nigeria
138 { 0x046A, "yo","NG" }, // Yoruba Nigeria
139 { 0x046B, "qu","BO" }, // Quechua Bolivia
140 { 0x046C, "st","ZA" }, // Sesotho sa Leboa South Africa
141 { 0x046D, "ba","RU" }, // Bashkir Russia
142 { 0x046E, "lb","LU" }, // Luxembourgish Luxembourg
143 { 0x046F, "kl","GL" }, // Greenlandic Greenland
144 { 0x0470, "ig","NG" }, // Igbo Nigeria
145 { 0x0478, "ii","CN" }, // Yi PRC
146 { 0x047A, "arn","CL" }, // Mapudungun Chile
147 { 0x047C, "moh","CA" }, // Mohawk Mohawk
148 { 0x047E, "br","FR" }, // Breton France
149 { 0x0480, "ug","CN" }, // Uighur PRC
150 { 0x0481, "mi","NZ" }, // Maori New Zealand
151 { 0x0482, "oc","FR" }, // Occitan France
152 { 0x0483, "co","FR" }, // Corsican France
153 { 0x0484, "gsw","FR" }, // Alsatian France
154 { 0x0485, "sah","RU" }, // Yakut Russia
155 { 0x0486, "qut","GT" }, // K'iche Guatemala
156 { 0x0487, "rw","RW" }, // Kinyarwanda Rwanda
157 { 0x048C, "gbz","AF" }, // Dari Afghanistan
158 { 0x0801, "ar","IQ" }, // Arabic Iraq
159 { 0x0804, "zn","CH" }, // Chinese People's Republic of China
160 { 0x0807, "de","CH" }, // German Switzerland
161 { 0x0809, "en","GB" }, // English United Kingdom
162 { 0x080A, "es","MX" }, // Spanish Mexico
163 { 0x080C, "fr","BE" }, // French Belgium
164 { 0x0810, "it","CH" }, // Italian Switzerland
165 { 0x0813, "nl","BE" }, // Dutch Belgium
166 { 0x0814, "nn","NO" }, // Norwegian (Nynorsk) Norway
167 { 0x0816, "pt","PT" }, // Portuguese Portugal
168 { 0x081A, "sh","RS" }, // Serbian (Latin) Serbia
169 { 0x081D, "sv","FI" }, // Sweden Finland
170 { 0x082C, "az","AZ" }, // Azeri (Cyrillic) Azerbaijan
171 { 0x082E, "dsb","DE" }, // Lower Sorbian Germany
172 { 0x083B, "se","SE" }, // Sami (Northern) Sweden
173 { 0x083C, "ga","IE" }, // Irish Ireland
174 { 0x083E, "ms","BN" }, // Malay Brunei Darussalam
175 { 0x0843, "uz","UZ" }, // Uzbek (Cyrillic) Uzbekistan
176 { 0x0845, "bn","BD" }, // Bengali Bangladesh
177 { 0x0850, "mn","MN" }, // Mongolian (Traditional) People's Republic of China
178 { 0x085D, "iu","CA" }, // Inuktitut (Latin) Canada
179 { 0x085F, "ber","DZ" }, // Tamazight (Latin) Algeria
180 { 0x086B, "es","EC" }, // Quechua Ecuador
181 { 0x0C01, "ar","EG" }, // Arabic Egypt
182 { 0x0C04, "zh","HK" }, // Chinese Hong Kong S.A.R.
183 { 0x0C07, "de","AT" }, // German Austria
184 { 0x0C09, "en","AU" }, // English Australia
185 { 0x0C0A, "es","ES" }, // Spanish (Modern Sort) Spain
186 { 0x0C0C, "fr","CA" }, // French Canada
187 { 0x0C1A, "sr","CS" }, // Serbian (Cyrillic) Serbia
188 { 0x0C3B, "se","FI" }, // Sami (Northern) Finland
189 { 0x0C6B, "qu","PE" }, // Quechua Peru
190 { 0x1001, "ar","LY" }, // Arabic Libya
191 { 0x1004, "zh","SG" }, // Chinese Singapore
192 { 0x1007, "de","LU" }, // German Luxembourg
193 { 0x1009, "en","CA" }, // English Canada
194 { 0x100A, "es","GT" }, // Spanish Guatemala
195 { 0x100C, "fr","CH" }, // French Switzerland
196 { 0x101A, "hr","BA" }, // Croatian (Latin) Bosnia and Herzegovina
197 { 0x103B, "smj","NO" }, // Sami (Lule) Norway
198 { 0x1401, "ar","DZ" }, // Arabic Algeria
199 { 0x1404, "zh","MO" }, // Chinese Macao S.A.R.
200 { 0x1407, "de","LI" }, // German Liechtenstein
201 { 0x1409, "en","NZ" }, // English New Zealand
202 { 0x140A, "es","CR" }, // Spanish Costa Rica
203 { 0x140C, "fr","LU" }, // French Luxembourg
204 { 0x141A, "bs","BA" }, // Bosnian (Latin) Bosnia and Herzegovina
205 { 0x143B, "smj","SE" }, // Sami (Lule) Sweden
206 { 0x1801, "ar","MA" }, // Arabic Morocco
207 { 0x1809, "en","IE" }, // English Ireland
208 { 0x180A, "es","PA" }, // Spanish Panama
209 { 0x180C, "fr","MC" }, // French Principality of Monoco
210 { 0x181A, "sh","BA" }, // Serbian (Latin) Bosnia and Herzegovina
211 { 0x183B, "sma","NO" }, // Sami (Southern) Norway
212 { 0x1C01, "ar","TN" }, // Arabic Tunisia
213 { 0x1C09, "en","ZA" }, // English South Africa
214 { 0x1C0A, "es","DO" }, // Spanish Dominican Republic
215 { 0x1C1A, "sr","BA" }, // Serbian (Cyrillic) Bosnia and Herzegovina
216 { 0x1C3B, "sma","SE" }, // Sami (Southern) Sweden
217 { 0x2001, "ar","OM" }, // Arabic Oman
218 { 0x2009, "en","JM" }, // English Jamaica
219 { 0x200A, "es","VE" }, // Spanish Venezuela
220 { 0x201A, "bs","BA" }, // Bosnian (Cyrillic) Bosnia and Herzegovina
221 { 0x203B, "sms","FI" }, // Sami (Skolt) Finland
222 { 0x2401, "ar","YE" }, // Arabic Yemen
223 { 0x2409, "en","BS" }, // English Caribbean
224 { 0x240A, "es","CO" }, // Spanish Colombia
225 { 0x243B, "smn","FI" }, // Sami (Inari) Finland
226 { 0x2801, "ar","SY" }, // Arabic Syria
227 { 0x2809, "en","BZ" }, // English Belize
228 { 0x280A, "es","PE" }, // Spanish Peru
229 { 0x2C01, "ar","JO" }, // Arabic Jordan
230 { 0x2C09, "en","TT" }, // English Trinidad and Tobago
231 { 0x2C0A, "es","AR" }, // Spanish Argentina
232 { 0x3001, "ar","LB" }, // Arabic Lebanon
233 { 0x3009, "en","ZW" }, // English Zimbabwe
234 { 0x300A, "es","EC" }, // Spanish Ecuador
235 { 0x3401, "ar","KW" }, // Arabic Kuwait
236 { 0x3409, "en","PH" }, // English Republic of the Philippines
237 { 0x340A, "es","CL" }, // Spanish Chile
238 { 0x3801, "ar","AE" }, // Arabic U.A.E.
239 { 0x380A, "es","UY" }, // Spanish Uruguay
240 { 0x3C01, "ar","BH" }, // Arabic Bahrain
241 { 0x3C0A, "es","PY" }, // Spanish Paraguay
242 { 0x4001, "ar","QA" }, // Arabic Qatar
243 { 0x4009, "en","IN" }, // English India
244 { 0x400A, "es","BO" }, // Spanish Bolivia
245 { 0x4409, "en","MY" }, // English Malaysia
246 { 0x440A, "es","SV" }, // Spanish El Salvador
247 { 0x4809, "en","SG" }, // English Singapore
248 { 0x480A, "es","HN" }, // Spanish Honduras
249 { 0x4C0A, "es","NI" }, // Spanish Nicaragua
250 { 0x500A, "es","PR" }, // Spanish Puerto Rico
251 { 0x540A, "es","US" } // Spanish United States
252 };
254 class Locale2Lang
255 {
256 Locale2Lang(const Locale2Lang &);
257 Locale2Lang & operator = (const Locale2Lang &);
259 public:
260 Locale2Lang() : mSeedPosition(128)
261 {
262 memset((void*)mLangLookup, 0, sizeof(mLangLookup));
263 // create a tri lookup on first 2 letters of language code
264 static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
265 for (int i = 0; i < maxIndex; i++)
266 {
267 size_t a = LANG_ENTRIES[i].maLangStr[0] - 'a';
268 size_t b = LANG_ENTRIES[i].maLangStr[1] - 'a';
269 if (mLangLookup[a][b])
270 {
271 const IsoLangEntry ** old = mLangLookup[a][b];
272 int len = 1;
273 while (old[len]) len++;
274 len += 2;
275 mLangLookup[a][b] = gralloc<const IsoLangEntry *>(len);
276 if (!mLangLookup[a][b])
277 {
278 mLangLookup[a][b] = old;
279 continue;
280 }
281 mLangLookup[a][b][--len] = NULL;
282 mLangLookup[a][b][--len] = &LANG_ENTRIES[i];
283 while (--len >= 0)
284 {
285 assert(len >= 0);
286 mLangLookup[a][b][len] = old[len];
287 }
288 free(old);
289 }
290 else
291 {
292 mLangLookup[a][b] = gralloc<const IsoLangEntry *>(2);
293 if (!mLangLookup[a][b]) continue;
294 mLangLookup[a][b][1] = NULL;
295 mLangLookup[a][b][0] = &LANG_ENTRIES[i];
296 }
297 }
298 while (2 * mSeedPosition < maxIndex)
299 mSeedPosition *= 2;
300 };
301 ~Locale2Lang()
302 {
303 for (int i = 0; i != 26; ++i)
304 for (int j = 0; j != 26; ++j)
305 free(mLangLookup[i][j]);
306 }
307 unsigned short getMsId(const char * locale) const
308 {
309 size_t length = strlen(locale);
310 size_t langLength = length;
311 const char * language = locale;
312 const char * script = NULL;
313 const char * region = NULL;
314 size_t regionLength = 0;
315 const char * dash = strchr(locale, '-');
316 if (dash && (dash != locale))
317 {
318 langLength = (dash - locale);
319 size_t nextPartLength = length - langLength - 1;
320 if (nextPartLength >= 2)
321 {
322 script = ++dash;
323 dash = strchr(dash, '-');
324 if (dash)
325 {
326 nextPartLength = (dash - script);
327 region = ++dash;
328 }
329 if (nextPartLength == 2 &&
330 (locale[langLength+1] > 0x40) && (locale[langLength+1] < 0x5B) &&
331 (locale[langLength+2] > 0x40) && (locale[langLength+2] < 0x5B))
332 {
333 region = script;
334 regionLength = nextPartLength;
335 script = NULL;
336 }
337 else if (nextPartLength == 4)
338 {
339 if (dash)
340 {
341 dash = strchr(dash, '-');
342 if (dash)
343 {
344 nextPartLength = (dash - region);
345 }
346 else
347 {
348 nextPartLength = langLength - (region - locale);
349 }
350 regionLength = nextPartLength;
351 }
352 }
353 }
354 }
355 size_t a = 'e' - 'a';
356 size_t b = 'n' - 'a';
357 unsigned short langId = 0;
358 int i = 0;
359 switch (langLength)
360 {
361 case 2:
362 {
363 a = language[0] - 'a';
364 b = language[1] - 'a';
365 if ((a < 26) && (b < 26) && mLangLookup[a][b])
366 {
367 while (mLangLookup[a][b][i])
368 {
369 if (mLangLookup[a][b][i]->maLangStr[2] != '\0')
370 {
371 ++i;
372 continue;
373 }
374 if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
375 {
376 langId = mLangLookup[a][b][i]->mnLang;
377 break;
378 }
379 else if (langId == 0)
380 {
381 // possible fallback code
382 langId = mLangLookup[a][b][i]->mnLang;
383 }
384 ++i;
385 }
386 }
387 }
388 break;
389 case 3:
390 {
391 a = language[0] - 'a';
392 b = language[1] - 'a';
393 if (mLangLookup[a][b])
394 {
395 while (mLangLookup[a][b][i])
396 {
397 if (mLangLookup[a][b][i]->maLangStr[2] != language[2])
398 {
399 ++i;
400 continue;
401 }
402 if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
403 {
404 langId = mLangLookup[a][b][i]->mnLang;
405 break;
406 }
407 else if (langId == 0)
408 {
409 // possible fallback code
410 langId = mLangLookup[a][b][i]->mnLang;
411 }
412 ++i;
413 }
414 }
415 }
416 break;
417 default:
418 break;
419 }
420 if (langId == 0) langId = 0x409;
421 return langId;
422 }
423 const IsoLangEntry * findEntryById(unsigned short langId) const
424 {
425 static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
426 int window = mSeedPosition;
427 int guess = mSeedPosition - 1;
428 while (LANG_ENTRIES[guess].mnLang != langId)
429 {
430 window /= 2;
431 if (window == 0) return NULL;
432 guess += (LANG_ENTRIES[guess].mnLang > langId)? -window : window;
433 while (guess >= maxIndex)
434 {
435 window /= 2;
436 guess -= window;
437 assert(window);
438 }
439 }
440 return &LANG_ENTRIES[guess];
441 }
443 CLASS_NEW_DELETE;
445 private:
446 const IsoLangEntry ** mLangLookup[26][26];
447 int mSeedPosition;
448 };
450 } // namespace graphite2