michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 1997-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * michael@0: * File ULOC.CPP michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 04/01/97 aliu Creation. michael@0: * 08/21/98 stephen JDK 1.2 sync michael@0: * 12/08/98 rtg New Locale implementation and C API michael@0: * 03/15/99 damiba overhaul. michael@0: * 04/06/99 stephen changed setDefault() to realloc and copy michael@0: * 06/14/99 stephen Changed calls to ures_open for new params michael@0: * 07/21/99 stephen Modified setDefault() to propagate to C++ michael@0: * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, michael@0: * brought canonicalization code into line with spec michael@0: *****************************************************************************/ michael@0: michael@0: /* michael@0: POSIX's locale format, from putil.c: [no spaces] michael@0: michael@0: ll [ _CC ] [ . MM ] [ @ VV] michael@0: michael@0: l = lang, C = ctry, M = charmap, V = variant michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/uloc.h" michael@0: michael@0: #include "putilimp.h" michael@0: #include "ustr_imp.h" michael@0: #include "ulocimp.h" michael@0: #include "umutex.h" michael@0: #include "cstring.h" michael@0: #include "cmemory.h" michael@0: #include "ucln_cmn.h" michael@0: #include "locmap.h" michael@0: #include "uarrsort.h" michael@0: #include "uenumimp.h" michael@0: #include "uassert.h" michael@0: michael@0: #include /* for sprintf */ michael@0: michael@0: /* ### Declarations **************************************************/ michael@0: michael@0: /* Locale stuff from locid.cpp */ michael@0: U_CFUNC void locale_set_default(const char *id); michael@0: U_CFUNC const char *locale_get_default(void); michael@0: U_CFUNC int32_t michael@0: locale_getKeywords(const char *localeID, michael@0: char prev, michael@0: char *keywords, int32_t keywordCapacity, michael@0: char *values, int32_t valuesCapacity, int32_t *valLen, michael@0: UBool valuesToo, michael@0: UErrorCode *status); michael@0: michael@0: /* ### Data tables **************************************************/ michael@0: michael@0: /** michael@0: * Table of language codes, both 2- and 3-letter, with preference michael@0: * given to 2-letter codes where possible. Includes 3-letter codes michael@0: * that lack a 2-letter equivalent. michael@0: * michael@0: * This list must be in sorted order. This list is returned directly michael@0: * to the user by some API. michael@0: * michael@0: * This list must be kept in sync with LANGUAGES_3, with corresponding michael@0: * entries matched. michael@0: * michael@0: * This table should be terminated with a NULL entry, followed by a michael@0: * second list, and another NULL entry. The first list is visible to michael@0: * user code when this array is returned by API. The second list michael@0: * contains codes we support, but do not expose through user API. michael@0: * michael@0: * Notes michael@0: * michael@0: * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to michael@0: * include the revisions up to 2001/7/27 *CWB* michael@0: * michael@0: * The 3 character codes are the terminology codes like RFC 3066. This michael@0: * is compatible with prior ICU codes michael@0: * michael@0: * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the michael@0: * table but now at the end of the table because 3 character codes are michael@0: * duplicates. This avoids bad searches going from 3 to 2 character michael@0: * codes. michael@0: * michael@0: * The range qaa-qtz is reserved for local use michael@0: */ michael@0: /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ michael@0: /* ISO639 table version is 20130531 */ michael@0: static const char * const LANGUAGES[] = { michael@0: "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", michael@0: "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg", michael@0: "alt", "am", "an", "ang", "anp", "apa", "ar", "arc", michael@0: "arn", "arp", "art", "arw", "as", "asa", "ast", "ath", michael@0: "aus", "av", "awa", "ay", "az", michael@0: "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax", michael@0: "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg", michael@0: "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm", michael@0: "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss", michael@0: "btk", "bua", "bug", "bum", "byn", "byv", michael@0: "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce", michael@0: "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm", michael@0: "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co", michael@0: "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", michael@0: "csb", "cu", "cus", "cv", "cy", michael@0: "da", "dak", "dar", "dav", "day", "de", "del", "den", michael@0: "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", michael@0: "dv", "dyo", "dyu", "dz", "dzg", michael@0: "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en", michael@0: "enm", "eo", "es", "et", "eu", "ewo", michael@0: "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj", michael@0: "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur", michael@0: "fy", michael@0: "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil", michael@0: "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb", michael@0: "grc", "gsw", "gu", "guz", "gv", "gwi", michael@0: "ha", "hai", "haw", "he", "hi", "hil", "him", "hit", michael@0: "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", michael@0: "hz", michael@0: "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo", michael@0: "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro", michael@0: "is", "it", "iu", michael@0: "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv", michael@0: "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw", michael@0: "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha", michael@0: "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl", michael@0: "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe", michael@0: "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf", michael@0: "ksh", "ku", "kum", "kut", "kv", "kw", "ky", michael@0: "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg", michael@0: "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu", michael@0: "lua", "lui", "lun", "luo", "lus", "luy", "lv", michael@0: "mad", "maf", "mag", "mai", "mak", "man", "map", "mas", michael@0: "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga", michael@0: "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", michael@0: "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh", michael@0: "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus", michael@0: "mwl", "mwr", "my", "mye", "myn", "myv", michael@0: "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds", michael@0: "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg", michael@0: "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso", michael@0: "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", michael@0: "nzi", michael@0: "oc", "oj", "om", "or", "os", "osa", "ota", "oto", michael@0: "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo", michael@0: "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps", michael@0: "pt", michael@0: "qu", michael@0: "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof", michael@0: "rom", "ru", "rup", "rw", "rwk", michael@0: "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas", michael@0: "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se", michael@0: "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn", michael@0: "shi", "shn", "shu", "si", "sid", "sio", "sit", michael@0: "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", michael@0: "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", michael@0: "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk", michael@0: "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr", michael@0: "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg", michael@0: "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh", michael@0: "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", michael@0: "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", michael@0: "twq", "ty", "tyv", "tzm", michael@0: "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", michael@0: "vai", "ve", "vi", "vo", "vot", "vun", michael@0: "wa", "wae", "wak", "wal", "war", "was", "wen", "wo", michael@0: "xal", "xh", "xog", michael@0: "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue", michael@0: "za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu", michael@0: "zun", "zxx", "zza", michael@0: NULL, michael@0: "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ michael@0: NULL michael@0: }; michael@0: michael@0: static const char* const DEPRECATED_LANGUAGES[]={ michael@0: "in", "iw", "ji", "jw", NULL, NULL michael@0: }; michael@0: static const char* const REPLACEMENT_LANGUAGES[]={ michael@0: "id", "he", "yi", "jv", NULL, NULL michael@0: }; michael@0: michael@0: /** michael@0: * Table of 3-letter language codes. michael@0: * michael@0: * This is a lookup table used to convert 3-letter language codes to michael@0: * their 2-letter equivalent, where possible. It must be kept in sync michael@0: * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the michael@0: * same language as LANGUAGES_3[i]. The commented-out lines are michael@0: * copied from LANGUAGES to make eyeballing this baby easier. michael@0: * michael@0: * Where a 3-letter language code has no 2-letter equivalent, the michael@0: * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. michael@0: * michael@0: * This table should be terminated with a NULL entry, followed by a michael@0: * second list, and another NULL entry. The two lists correspond to michael@0: * the two lists in LANGUAGES. michael@0: */ michael@0: /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ michael@0: /* ISO639 table version is 20130531 */ michael@0: static const char * const LANGUAGES_3[] = { michael@0: "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", michael@0: "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg", michael@0: "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc", michael@0: "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath", michael@0: "aus", "ava", "awa", "aym", "aze", michael@0: "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax", michael@0: "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul", michael@0: "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam", michael@0: "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss", michael@0: "btk", "bua", "bug", "bum", "byn", "byv", michael@0: "cat", "cad", "cai", "car", "cau", "cay", "cch", "che", michael@0: "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm", michael@0: "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos", michael@0: "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", michael@0: "csb", "chu", "cus", "chv", "cym", michael@0: "dan", "dak", "dar", "dav", "day", "deu", "del", "den", michael@0: "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", michael@0: "div", "dyo", "dyu", "dzo", "dzg", michael@0: "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng", michael@0: "enm", "epo", "spa", "est", "eus", "ewo", michael@0: "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij", michael@0: "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur", michael@0: "fry", michael@0: "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil", michael@0: "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb", michael@0: "grc", "gsw", "guj", "guz", "glv", "gwi", michael@0: "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit", michael@0: "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", michael@0: "her", michael@0: "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo", michael@0: "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro", michael@0: "isl", "ita", "iku", michael@0: "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav", michael@0: "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw", michael@0: "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha", michael@0: "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal", michael@0: "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe", michael@0: "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf", michael@0: "ksh", "kur", "kum", "kut", "kom", "cor", "kir", michael@0: "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug", michael@0: "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub", michael@0: "lua", "lui", "lun", "luo", "lus", "luy", "lav", michael@0: "mad", "maf", "mag", "mai", "mak", "man", "map", "mas", michael@0: "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga", michael@0: "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", michael@0: "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh", michael@0: "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus", michael@0: "mwl", "mwr", "mya", "mye", "myn", "myv", michael@0: "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds", michael@0: "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg", michael@0: "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso", michael@0: "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", michael@0: "nzi", michael@0: "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto", michael@0: "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo", michael@0: "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus", michael@0: "por", michael@0: "que", michael@0: "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof", michael@0: "rom", "rus", "rup", "kin", "rwk", michael@0: "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas", michael@0: "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme", michael@0: "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn", michael@0: "shi", "shn", "shu", "sin", "sid", "sio", "sit", michael@0: "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn", michael@0: "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp", michael@0: "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk", michael@0: "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr", michael@0: "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk", michael@0: "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh", michael@0: "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv", michael@0: "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi", michael@0: "twq", "tah", "tyv", "tzm", michael@0: "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", michael@0: "vai", "ven", "vie", "vol", "vot", "vun", michael@0: "wln", "wae", "wak", "wal", "war", "was", "wen", "wol", michael@0: "xal", "xho", "xog", michael@0: "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue", michael@0: "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul", michael@0: "zun", "zxx", "zza", michael@0: NULL, michael@0: /* "in", "iw", "ji", "jw", "sh", */ michael@0: "ind", "heb", "yid", "jaw", "srp", michael@0: NULL michael@0: }; michael@0: michael@0: /** michael@0: * Table of 2-letter country codes. michael@0: * michael@0: * This list must be in sorted order. This list is returned directly michael@0: * to the user by some API. michael@0: * michael@0: * This list must be kept in sync with COUNTRIES_3, with corresponding michael@0: * entries matched. michael@0: * michael@0: * This table should be terminated with a NULL entry, followed by a michael@0: * second list, and another NULL entry. The first list is visible to michael@0: * user code when this array is returned by API. The second list michael@0: * contains codes we support, but do not expose through user API. michael@0: * michael@0: * Notes: michael@0: * michael@0: * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per michael@0: * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added michael@0: * new codes keeping the old ones for compatibility updated to include michael@0: * 1999/12/03 revisions *CWB* michael@0: * michael@0: * RO(ROM) is now RO(ROU) according to michael@0: * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html michael@0: */ michael@0: static const char * const COUNTRIES[] = { michael@0: "AD", "AE", "AF", "AG", "AI", "AL", "AM", michael@0: "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", michael@0: "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", michael@0: "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", michael@0: "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", michael@0: "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", michael@0: "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", michael@0: "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", michael@0: "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", michael@0: "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", michael@0: "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", michael@0: "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", michael@0: "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", michael@0: "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", michael@0: "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", michael@0: "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", michael@0: "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", michael@0: "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", michael@0: "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", michael@0: "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", michael@0: "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", michael@0: "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", michael@0: "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", michael@0: "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", michael@0: "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", michael@0: "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", michael@0: "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", michael@0: "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", michael@0: "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", michael@0: "WS", "YE", "YT", "ZA", "ZM", "ZW", michael@0: NULL, michael@0: "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ michael@0: NULL michael@0: }; michael@0: michael@0: static const char* const DEPRECATED_COUNTRIES[] = { michael@0: "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */ michael@0: }; michael@0: static const char* const REPLACEMENT_COUNTRIES[] = { michael@0: /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */ michael@0: "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */ michael@0: }; michael@0: michael@0: /** michael@0: * Table of 3-letter country codes. michael@0: * michael@0: * This is a lookup table used to convert 3-letter country codes to michael@0: * their 2-letter equivalent. It must be kept in sync with COUNTRIES. michael@0: * For all valid i, COUNTRIES[i] must refer to the same country as michael@0: * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES michael@0: * to make eyeballing this baby easier. michael@0: * michael@0: * This table should be terminated with a NULL entry, followed by a michael@0: * second list, and another NULL entry. The two lists correspond to michael@0: * the two lists in COUNTRIES. michael@0: */ michael@0: static const char * const COUNTRIES_3[] = { michael@0: /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */ michael@0: "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", michael@0: /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ michael@0: "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", michael@0: /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ michael@0: "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", michael@0: /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */ michael@0: "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT", michael@0: /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ michael@0: "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", michael@0: /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ michael@0: "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", michael@0: /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ michael@0: "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", michael@0: /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ michael@0: "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", michael@0: /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ michael@0: "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", michael@0: /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ michael@0: "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", michael@0: /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ michael@0: "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", michael@0: /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ michael@0: "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", michael@0: /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ michael@0: "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", michael@0: /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ michael@0: "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", michael@0: /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ michael@0: "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", michael@0: /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ michael@0: "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", michael@0: /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ michael@0: "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", michael@0: /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ michael@0: "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", michael@0: /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ michael@0: "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", michael@0: /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ michael@0: "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", michael@0: /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ michael@0: "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", michael@0: /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ michael@0: "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", michael@0: /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ michael@0: "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", michael@0: /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ michael@0: "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", michael@0: /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */ michael@0: "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV", michael@0: /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ michael@0: "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", michael@0: /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ michael@0: "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", michael@0: /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ michael@0: "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", michael@0: /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ michael@0: "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", michael@0: /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ michael@0: "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", michael@0: NULL, michael@0: /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ michael@0: "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", michael@0: NULL michael@0: }; michael@0: michael@0: typedef struct CanonicalizationMap { michael@0: const char *id; /* input ID */ michael@0: const char *canonicalID; /* canonicalized output ID */ michael@0: const char *keyword; /* keyword, or NULL if none */ michael@0: const char *value; /* keyword value, or NULL if kw==NULL */ michael@0: } CanonicalizationMap; michael@0: michael@0: /** michael@0: * A map to canonicalize locale IDs. This handles a variety of michael@0: * different semantic kinds of transformations. michael@0: */ michael@0: static const CanonicalizationMap CANONICALIZE_MAP[] = { michael@0: { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ michael@0: { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */ michael@0: { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ michael@0: { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ michael@0: { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ michael@0: { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ michael@0: { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, michael@0: { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ michael@0: { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, michael@0: { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, michael@0: { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, michael@0: { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, michael@0: { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, michael@0: { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, michael@0: { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ michael@0: { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, michael@0: { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, michael@0: { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, michael@0: { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, michael@0: { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, michael@0: { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, michael@0: { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, michael@0: { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, michael@0: { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ michael@0: { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, michael@0: { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ michael@0: { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ michael@0: { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, michael@0: { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, michael@0: { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, michael@0: { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ michael@0: { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ michael@0: { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ michael@0: { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ michael@0: { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ michael@0: { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ michael@0: { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ michael@0: { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ michael@0: { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ michael@0: { "zh_GAN", "gan", NULL, NULL }, /* registered name */ michael@0: { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ michael@0: { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */ michael@0: { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */ michael@0: { "zh_WUU", "wuu", NULL, NULL }, /* registered name */ michael@0: { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */ michael@0: { "zh_YUE", "yue", NULL, NULL }, /* registered name */ michael@0: }; michael@0: michael@0: typedef struct VariantMap { michael@0: const char *variant; /* input ID */ michael@0: const char *keyword; /* keyword, or NULL if none */ michael@0: const char *value; /* keyword value, or NULL if kw==NULL */ michael@0: } VariantMap; michael@0: michael@0: static const VariantMap VARIANT_MAP[] = { michael@0: { "EURO", "currency", "EUR" }, michael@0: { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ michael@0: { "STROKE", "collation", "stroke" } /* Solaris variant */ michael@0: }; michael@0: michael@0: /* ### BCP47 Conversion *******************************************/ michael@0: /* Test if the locale id has BCP47 u extension and does not have '@' */ michael@0: #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) michael@0: /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ michael@0: #define _ConvertBCP47(finalID, id, buffer, length,err) \ michael@0: if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \ michael@0: finalID=id; \ michael@0: } else { \ michael@0: finalID=buffer; \ michael@0: } michael@0: /* Gets the size of the shortest subtag in the given localeID. */ michael@0: static int32_t getShortestSubtagLength(const char *localeID) { michael@0: int32_t localeIDLength = uprv_strlen(localeID); michael@0: int32_t length = localeIDLength; michael@0: int32_t tmpLength = 0; michael@0: int32_t i; michael@0: UBool reset = TRUE; michael@0: michael@0: for (i = 0; i < localeIDLength; i++) { michael@0: if (localeID[i] != '_' && localeID[i] != '-') { michael@0: if (reset) { michael@0: tmpLength = 0; michael@0: reset = FALSE; michael@0: } michael@0: tmpLength++; michael@0: } else { michael@0: if (tmpLength != 0 && tmpLength < length) { michael@0: length = tmpLength; michael@0: } michael@0: reset = TRUE; michael@0: } michael@0: } michael@0: michael@0: return length; michael@0: } michael@0: michael@0: /* ### Keywords **************************************************/ michael@0: michael@0: #define ULOC_KEYWORD_BUFFER_LEN 25 michael@0: #define ULOC_MAX_NO_KEYWORDS 25 michael@0: michael@0: U_CAPI const char * U_EXPORT2 michael@0: locale_getKeywordsStart(const char *localeID) { michael@0: const char *result = NULL; michael@0: if((result = uprv_strchr(localeID, '@')) != NULL) { michael@0: return result; michael@0: } michael@0: #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) michael@0: else { michael@0: /* We do this because the @ sign is variant, and the @ sign used on one michael@0: EBCDIC machine won't be compiled the same way on other EBCDIC based michael@0: machines. */ michael@0: static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; michael@0: const uint8_t *charToFind = ebcdicSigns; michael@0: while(*charToFind) { michael@0: if((result = uprv_strchr(localeID, *charToFind)) != NULL) { michael@0: return result; michael@0: } michael@0: charToFind++; michael@0: } michael@0: } michael@0: #endif michael@0: return NULL; michael@0: } michael@0: michael@0: /** michael@0: * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] michael@0: * @param keywordName incoming name to be canonicalized michael@0: * @param status return status (keyword too long) michael@0: * @return length of the keyword name michael@0: */ michael@0: static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) michael@0: { michael@0: int32_t i; michael@0: int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); michael@0: michael@0: if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { michael@0: /* keyword name too long for internal buffer */ michael@0: *status = U_INTERNAL_PROGRAM_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* normalize the keyword name */ michael@0: for(i = 0; i < keywordNameLen; i++) { michael@0: buf[i] = uprv_tolower(keywordName[i]); michael@0: } michael@0: buf[i] = 0; michael@0: michael@0: return keywordNameLen; michael@0: } michael@0: michael@0: typedef struct { michael@0: char keyword[ULOC_KEYWORD_BUFFER_LEN]; michael@0: int32_t keywordLen; michael@0: const char *valueStart; michael@0: int32_t valueLen; michael@0: } KeywordStruct; michael@0: michael@0: static int32_t U_CALLCONV michael@0: compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { michael@0: const char* leftString = ((const KeywordStruct *)left)->keyword; michael@0: const char* rightString = ((const KeywordStruct *)right)->keyword; michael@0: return uprv_strcmp(leftString, rightString); michael@0: } michael@0: michael@0: /** michael@0: * Both addKeyword and addValue must already be in canonical form. michael@0: * Either both addKeyword and addValue are NULL, or neither is NULL. michael@0: * If they are not NULL they must be zero terminated. michael@0: * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. michael@0: */ michael@0: static int32_t michael@0: _getKeywords(const char *localeID, michael@0: char prev, michael@0: char *keywords, int32_t keywordCapacity, michael@0: char *values, int32_t valuesCapacity, int32_t *valLen, michael@0: UBool valuesToo, michael@0: const char* addKeyword, michael@0: const char* addValue, michael@0: UErrorCode *status) michael@0: { michael@0: KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; michael@0: michael@0: int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; michael@0: int32_t numKeywords = 0; michael@0: const char* pos = localeID; michael@0: const char* equalSign = NULL; michael@0: const char* semicolon = NULL; michael@0: int32_t i = 0, j, n; michael@0: int32_t keywordsLen = 0; michael@0: int32_t valuesLen = 0; michael@0: michael@0: if(prev == '@') { /* start of keyword definition */ michael@0: /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ michael@0: do { michael@0: UBool duplicate = FALSE; michael@0: /* skip leading spaces */ michael@0: while(*pos == ' ') { michael@0: pos++; michael@0: } michael@0: if (!*pos) { /* handle trailing "; " */ michael@0: break; michael@0: } michael@0: if(numKeywords == maxKeywords) { michael@0: *status = U_INTERNAL_PROGRAM_ERROR; michael@0: return 0; michael@0: } michael@0: equalSign = uprv_strchr(pos, '='); michael@0: semicolon = uprv_strchr(pos, ';'); michael@0: /* lack of '=' [foo@currency] is illegal */ michael@0: /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ michael@0: if(!equalSign || (semicolon && semicolon= ULOC_KEYWORD_BUFFER_LEN) { michael@0: /* keyword name too long for internal buffer */ michael@0: *status = U_INTERNAL_PROGRAM_ERROR; michael@0: return 0; michael@0: } michael@0: for(i = 0, n = 0; i < equalSign - pos; ++i) { michael@0: if (pos[i] != ' ') { michael@0: keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); michael@0: } michael@0: } michael@0: michael@0: /* zero-length keyword is an error. */ michael@0: if (n == 0) { michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: keywordList[numKeywords].keyword[n] = 0; michael@0: keywordList[numKeywords].keywordLen = n; michael@0: /* now grab the value part. First we skip the '=' */ michael@0: equalSign++; michael@0: /* then we leading spaces */ michael@0: while(*equalSign == ' ') { michael@0: equalSign++; michael@0: } michael@0: michael@0: /* Premature end or zero-length value */ michael@0: if (!equalSign || equalSign == semicolon) { michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: keywordList[numKeywords].valueStart = equalSign; michael@0: michael@0: pos = semicolon; michael@0: i = 0; michael@0: if(pos) { michael@0: while(*(pos - i - 1) == ' ') { michael@0: i++; michael@0: } michael@0: keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); michael@0: pos++; michael@0: } else { michael@0: i = (int32_t)uprv_strlen(equalSign); michael@0: while(i && equalSign[i-1] == ' ') { michael@0: i--; michael@0: } michael@0: keywordList[numKeywords].valueLen = i; michael@0: } michael@0: /* If this is a duplicate keyword, then ignore it */ michael@0: for (j=0; j=0); michael@0: } michael@0: localeKeywordNameBuffer[i] = 0; michael@0: michael@0: startSearchHere = uprv_strchr(nextSeparator, ';'); michael@0: michael@0: if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { michael@0: nextSeparator++; michael@0: while(*nextSeparator == ' ') { michael@0: nextSeparator++; michael@0: } michael@0: /* we actually found the keyword. Copy the value */ michael@0: if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { michael@0: while(*(startSearchHere-1) == ' ') { michael@0: startSearchHere--; michael@0: } michael@0: uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); michael@0: result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); michael@0: } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ michael@0: i = (int32_t)uprv_strlen(nextSeparator); michael@0: while(nextSeparator[i - 1] == ' ') { michael@0: i--; michael@0: } michael@0: uprv_strncpy(buffer, nextSeparator, i); michael@0: result = u_terminateChars(buffer, bufferCapacity, i, status); michael@0: } else { michael@0: /* give a bigger buffer, please */ michael@0: *status = U_BUFFER_OVERFLOW_ERROR; michael@0: if(startSearchHere) { michael@0: result = (int32_t)(startSearchHere - nextSeparator); michael@0: } else { michael@0: result = (int32_t)uprv_strlen(nextSeparator); michael@0: } michael@0: } michael@0: return result; michael@0: } michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uloc_setKeywordValue(const char* keywordName, michael@0: const char* keywordValue, michael@0: char* buffer, int32_t bufferCapacity, michael@0: UErrorCode* status) michael@0: { michael@0: /* TODO: sorting. removal. */ michael@0: int32_t keywordNameLen; michael@0: int32_t keywordValueLen; michael@0: int32_t bufLen; michael@0: int32_t needLen = 0; michael@0: int32_t foundValueLen; michael@0: int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ michael@0: char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; michael@0: char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; michael@0: int32_t i = 0; michael@0: int32_t rc; michael@0: char* nextSeparator = NULL; michael@0: char* nextEqualsign = NULL; michael@0: char* startSearchHere = NULL; michael@0: char* keywordStart = NULL; michael@0: char *insertHere = NULL; michael@0: if(U_FAILURE(*status)) { michael@0: return -1; michael@0: } michael@0: if(bufferCapacity>1) { michael@0: bufLen = (int32_t)uprv_strlen(buffer); michael@0: } else { michael@0: *status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: if(bufferCapacity= bufferCapacity) { michael@0: *status = U_BUFFER_OVERFLOW_ERROR; michael@0: return needLen; /* no change */ michael@0: } michael@0: *startSearchHere = '@'; michael@0: startSearchHere++; michael@0: uprv_strcpy(startSearchHere, keywordNameBuffer); michael@0: startSearchHere += keywordNameLen; michael@0: *startSearchHere = '='; michael@0: startSearchHere++; michael@0: uprv_strcpy(startSearchHere, keywordValue); michael@0: startSearchHere+=keywordValueLen; michael@0: return needLen; michael@0: } /* end shortcut - no @ */ michael@0: michael@0: keywordStart = startSearchHere; michael@0: /* search for keyword */ michael@0: while(keywordStart) { michael@0: keywordStart++; michael@0: /* skip leading spaces (allowed?) */ michael@0: while(*keywordStart == ' ') { michael@0: keywordStart++; michael@0: } michael@0: nextEqualsign = uprv_strchr(keywordStart, '='); michael@0: /* need to normalize both keyword and keyword name */ michael@0: if(!nextEqualsign) { michael@0: break; michael@0: } michael@0: if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) { michael@0: /* keyword name too long for internal buffer */ michael@0: *status = U_INTERNAL_PROGRAM_ERROR; michael@0: return 0; michael@0: } michael@0: for(i = 0; i < nextEqualsign - keywordStart; i++) { michael@0: localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); michael@0: } michael@0: /* trim trailing spaces */ michael@0: while(keywordStart[i-1] == ' ') { michael@0: i--; michael@0: } michael@0: U_ASSERT(i>=0 && i keywordValueLen) { michael@0: int32_t delta = foundValueLen - keywordValueLen; michael@0: if(nextSeparator) { /* RH side */ michael@0: uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); michael@0: } michael@0: uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); michael@0: bufLen -= delta; michael@0: buffer[bufLen]=0; michael@0: return bufLen; michael@0: } else { /* FVL < KVL */ michael@0: int32_t delta = keywordValueLen - foundValueLen; michael@0: if((bufLen+delta) >= bufferCapacity) { michael@0: *status = U_BUFFER_OVERFLOW_ERROR; michael@0: return bufLen+delta; michael@0: } michael@0: if(nextSeparator) { /* RH side */ michael@0: uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); michael@0: } michael@0: uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); michael@0: bufLen += delta; michael@0: buffer[bufLen]=0; michael@0: return bufLen; michael@0: } michael@0: } else { /* removing a keyword */ michael@0: if(keywordAtEnd) { michael@0: /* zero out the ';' or '@' just before startSearchhere */ michael@0: keywordStart[-1] = 0; michael@0: return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ michael@0: } else { michael@0: uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); michael@0: keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; michael@0: return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); michael@0: } michael@0: } michael@0: } else if(rc<0){ /* end match keyword */ michael@0: /* could insert at this location. */ michael@0: insertHere = keywordStart; michael@0: } michael@0: keywordStart = nextSeparator; michael@0: } /* end loop searching */ michael@0: michael@0: if(!keywordValue) { michael@0: return bufLen; /* removal of non-extant keyword - no change */ michael@0: } michael@0: michael@0: /* we know there is at least one keyword. */ michael@0: needLen = bufLen+1+keywordNameLen+1+keywordValueLen; michael@0: if(needLen >= bufferCapacity) { michael@0: *status = U_BUFFER_OVERFLOW_ERROR; michael@0: return needLen; /* no change */ michael@0: } michael@0: michael@0: if(insertHere) { michael@0: uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); michael@0: keywordStart = insertHere; michael@0: } else { michael@0: keywordStart = buffer+bufLen; michael@0: *keywordStart = ';'; michael@0: keywordStart++; michael@0: } michael@0: uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); michael@0: keywordStart += keywordNameLen; michael@0: *keywordStart = '='; michael@0: keywordStart++; michael@0: uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ michael@0: keywordStart+=keywordValueLen; michael@0: if(insertHere) { michael@0: *keywordStart = ';'; michael@0: keywordStart++; michael@0: } michael@0: buffer[needLen]=0; michael@0: return needLen; michael@0: } michael@0: michael@0: /* ### ID parsing implementation **************************************************/ michael@0: michael@0: #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) michael@0: michael@0: /*returns TRUE if one of the special prefixes is here (s=string) michael@0: 'x-' or 'i-' */ michael@0: #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) michael@0: michael@0: /* Dot terminates it because of POSIX form where dot precedes the codepage michael@0: * except for variant michael@0: */ michael@0: #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) michael@0: michael@0: static char* _strnchr(const char* str, int32_t len, char c) { michael@0: U_ASSERT(str != 0 && len >= 0); michael@0: while (len-- != 0) { michael@0: char d = *str; michael@0: if (d == c) { michael@0: return (char*) str; michael@0: } else if (d == 0) { michael@0: break; michael@0: } michael@0: ++str; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: /** michael@0: * Lookup 'key' in the array 'list'. The array 'list' should contain michael@0: * a NULL entry, followed by more entries, and a second NULL entry. michael@0: * michael@0: * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or michael@0: * COUNTRIES_3. michael@0: */ michael@0: static int16_t _findIndex(const char* const* list, const char* key) michael@0: { michael@0: const char* const* anchor = list; michael@0: int32_t pass = 0; michael@0: michael@0: /* Make two passes through two NULL-terminated arrays at 'list' */ michael@0: while (pass++ < 2) { michael@0: while (*list) { michael@0: if (uprv_strcmp(key, *list) == 0) { michael@0: return (int16_t)(list - anchor); michael@0: } michael@0: list++; michael@0: } michael@0: ++list; /* skip final NULL *CWB*/ michael@0: } michael@0: return -1; michael@0: } michael@0: michael@0: /* count the length of src while copying it to dest; return strlen(src) */ michael@0: static inline int32_t michael@0: _copyCount(char *dest, int32_t destCapacity, const char *src) { michael@0: const char *anchor; michael@0: char c; michael@0: michael@0: anchor=src; michael@0: for(;;) { michael@0: if((c=*src)==0) { michael@0: return (int32_t)(src-anchor); michael@0: } michael@0: if(destCapacity<=0) { michael@0: return (int32_t)((src-anchor)+uprv_strlen(src)); michael@0: } michael@0: ++src; michael@0: *dest++=c; michael@0: --destCapacity; michael@0: } michael@0: } michael@0: michael@0: U_CFUNC const char* michael@0: uloc_getCurrentCountryID(const char* oldID){ michael@0: int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); michael@0: if (offset >= 0) { michael@0: return REPLACEMENT_COUNTRIES[offset]; michael@0: } michael@0: return oldID; michael@0: } michael@0: U_CFUNC const char* michael@0: uloc_getCurrentLanguageID(const char* oldID){ michael@0: int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); michael@0: if (offset >= 0) { michael@0: return REPLACEMENT_LANGUAGES[offset]; michael@0: } michael@0: return oldID; michael@0: } michael@0: /* michael@0: * the internal functions _getLanguage(), _getCountry(), _getVariant() michael@0: * avoid duplicating code to handle the earlier locale ID pieces michael@0: * in the functions for the later ones by michael@0: * setting the *pEnd pointer to where they stopped parsing michael@0: * michael@0: * TODO try to use this in Locale michael@0: */ michael@0: U_CFUNC int32_t michael@0: ulocimp_getLanguage(const char *localeID, michael@0: char *language, int32_t languageCapacity, michael@0: const char **pEnd) { michael@0: int32_t i=0; michael@0: int32_t offset; michael@0: char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ michael@0: michael@0: /* if it starts with i- or x- then copy that prefix */ michael@0: if(_isIDPrefix(localeID)) { michael@0: if(i=0); michael@0: lang[i]=(char)uprv_tolower(*localeID); michael@0: } michael@0: i++; michael@0: localeID++; michael@0: } michael@0: michael@0: if(i==3) { michael@0: /* convert 3 character code to 2 character code if possible *CWB*/ michael@0: offset=_findIndex(LANGUAGES_3, lang); michael@0: if(offset>=0) { michael@0: i=_copyCount(language, languageCapacity, LANGUAGES[offset]); michael@0: } michael@0: } michael@0: michael@0: if(pEnd!=NULL) { michael@0: *pEnd=localeID; michael@0: } michael@0: return i; michael@0: } michael@0: michael@0: U_CFUNC int32_t michael@0: ulocimp_getScript(const char *localeID, michael@0: char *script, int32_t scriptCapacity, michael@0: const char **pEnd) michael@0: { michael@0: int32_t idLen = 0; michael@0: michael@0: if (pEnd != NULL) { michael@0: *pEnd = localeID; michael@0: } michael@0: michael@0: /* copy the second item as far as possible and count its length */ michael@0: while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen]) michael@0: && uprv_isASCIILetter(localeID[idLen])) { michael@0: idLen++; michael@0: } michael@0: michael@0: /* If it's exactly 4 characters long, then it's a script and not a country. */ michael@0: if (idLen == 4) { michael@0: int32_t i; michael@0: if (pEnd != NULL) { michael@0: *pEnd = localeID+idLen; michael@0: } michael@0: if(idLen > scriptCapacity) { michael@0: idLen = scriptCapacity; michael@0: } michael@0: if (idLen >= 1) { michael@0: script[0]=(char)uprv_toupper(*(localeID++)); michael@0: } michael@0: for (i = 1; i < idLen; i++) { michael@0: script[i]=(char)uprv_tolower(*(localeID++)); michael@0: } michael@0: } michael@0: else { michael@0: idLen = 0; michael@0: } michael@0: return idLen; michael@0: } michael@0: michael@0: U_CFUNC int32_t michael@0: ulocimp_getCountry(const char *localeID, michael@0: char *country, int32_t countryCapacity, michael@0: const char **pEnd) michael@0: { michael@0: int32_t idLen=0; michael@0: char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; michael@0: int32_t offset; michael@0: michael@0: /* copy the country as far as possible and count its length */ michael@0: while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { michael@0: if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ michael@0: cnty[idLen]=(char)uprv_toupper(localeID[idLen]); michael@0: } michael@0: idLen++; michael@0: } michael@0: michael@0: /* the country should be either length 2 or 3 */ michael@0: if (idLen == 2 || idLen == 3) { michael@0: UBool gotCountry = FALSE; michael@0: /* convert 3 character code to 2 character code if possible *CWB*/ michael@0: if(idLen==3) { michael@0: offset=_findIndex(COUNTRIES_3, cnty); michael@0: if(offset>=0) { michael@0: idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); michael@0: gotCountry = TRUE; michael@0: } michael@0: } michael@0: if (!gotCountry) { michael@0: int32_t i = 0; michael@0: for (i = 0; i < idLen; i++) { michael@0: if (i < countryCapacity) { michael@0: country[i]=(char)uprv_toupper(localeID[i]); michael@0: } michael@0: } michael@0: } michael@0: localeID+=idLen; michael@0: } else { michael@0: idLen = 0; michael@0: } michael@0: michael@0: if(pEnd!=NULL) { michael@0: *pEnd=localeID; michael@0: } michael@0: michael@0: return idLen; michael@0: } michael@0: michael@0: /** michael@0: * @param needSeparator if true, then add leading '_' if any variants michael@0: * are added to 'variant' michael@0: */ michael@0: static int32_t michael@0: _getVariantEx(const char *localeID, michael@0: char prev, michael@0: char *variant, int32_t variantCapacity, michael@0: UBool needSeparator) { michael@0: int32_t i=0; michael@0: michael@0: /* get one or more variant tags and separate them with '_' */ michael@0: if(_isIDSeparator(prev)) { michael@0: /* get a variant string after a '-' or '_' */ michael@0: while(!_isTerminator(*localeID)) { michael@0: if (needSeparator) { michael@0: if (i "FOO_BAR". michael@0: * @param variants the source string of one or more variants, michael@0: * separated by '_'. This will be MODIFIED IN PLACE. Not zero michael@0: * terminated; if it is, trailing zero will NOT be maintained. michael@0: * @param variantsLen length of variants michael@0: * @param toDelete variant to delete, without separators, e.g. "EURO" michael@0: * or "PREEURO"; not zero terminated michael@0: * @param toDeleteLen length of toDelete michael@0: * @return number of characters deleted from variants michael@0: */ michael@0: static int32_t michael@0: _deleteVariant(char* variants, int32_t variantsLen, michael@0: const char* toDelete, int32_t toDeleteLen) michael@0: { michael@0: int32_t delta = 0; /* number of chars deleted */ michael@0: for (;;) { michael@0: UBool flag = FALSE; michael@0: if (variantsLen < toDeleteLen) { michael@0: return delta; michael@0: } michael@0: if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && michael@0: (variantsLen == toDeleteLen || michael@0: (flag=(variants[toDeleteLen] == '_')))) michael@0: { michael@0: int32_t d = toDeleteLen + (flag?1:0); michael@0: variantsLen -= d; michael@0: delta += d; michael@0: if (variantsLen > 0) { michael@0: uprv_memmove(variants, variants+d, variantsLen); michael@0: } michael@0: } else { michael@0: char* p = _strnchr(variants, variantsLen, '_'); michael@0: if (p == NULL) { michael@0: return delta; michael@0: } michael@0: ++p; michael@0: variantsLen -= (int32_t)(p - variants); michael@0: variants = p; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* Keyword enumeration */ michael@0: michael@0: typedef struct UKeywordsContext { michael@0: char* keywords; michael@0: char* current; michael@0: } UKeywordsContext; michael@0: michael@0: static void U_CALLCONV michael@0: uloc_kw_closeKeywords(UEnumeration *enumerator) { michael@0: uprv_free(((UKeywordsContext *)enumerator->context)->keywords); michael@0: uprv_free(enumerator->context); michael@0: uprv_free(enumerator); michael@0: } michael@0: michael@0: static int32_t U_CALLCONV michael@0: uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { michael@0: char *kw = ((UKeywordsContext *)en->context)->keywords; michael@0: int32_t result = 0; michael@0: while(*kw) { michael@0: result++; michael@0: kw += uprv_strlen(kw)+1; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: static const char* U_CALLCONV michael@0: uloc_kw_nextKeyword(UEnumeration* en, michael@0: int32_t* resultLength, michael@0: UErrorCode* /*status*/) { michael@0: const char* result = ((UKeywordsContext *)en->context)->current; michael@0: int32_t len = 0; michael@0: if(*result) { michael@0: len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); michael@0: ((UKeywordsContext *)en->context)->current += len+1; michael@0: } else { michael@0: result = NULL; michael@0: } michael@0: if (resultLength) { michael@0: *resultLength = len; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: static void U_CALLCONV michael@0: uloc_kw_resetKeywords(UEnumeration* en, michael@0: UErrorCode* /*status*/) { michael@0: ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; michael@0: } michael@0: michael@0: static const UEnumeration gKeywordsEnum = { michael@0: NULL, michael@0: NULL, michael@0: uloc_kw_closeKeywords, michael@0: uloc_kw_countKeywords, michael@0: uenum_unextDefault, michael@0: uloc_kw_nextKeyword, michael@0: uloc_kw_resetKeywords michael@0: }; michael@0: michael@0: U_CAPI UEnumeration* U_EXPORT2 michael@0: uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) michael@0: { michael@0: UKeywordsContext *myContext = NULL; michael@0: UEnumeration *result = NULL; michael@0: michael@0: if(U_FAILURE(*status)) { michael@0: return NULL; michael@0: } michael@0: result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); michael@0: /* Null pointer test */ michael@0: if (result == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); michael@0: myContext = static_cast(uprv_malloc(sizeof(UKeywordsContext))); michael@0: if (myContext == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: uprv_free(result); michael@0: return NULL; michael@0: } michael@0: myContext->keywords = (char *)uprv_malloc(keywordListSize+1); michael@0: uprv_memcpy(myContext->keywords, keywordList, keywordListSize); michael@0: myContext->keywords[keywordListSize] = 0; michael@0: myContext->current = myContext->keywords; michael@0: result->context = myContext; michael@0: return result; michael@0: } michael@0: michael@0: U_CAPI UEnumeration* U_EXPORT2 michael@0: uloc_openKeywords(const char* localeID, michael@0: UErrorCode* status) michael@0: { michael@0: int32_t i=0; michael@0: char keywords[256]; michael@0: int32_t keywordsCapacity = 256; michael@0: char tempBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: const char* tmpLocaleID; michael@0: michael@0: if(status==NULL || U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: michael@0: if (_hasBCP47Extension(localeID)) { michael@0: _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); michael@0: } else { michael@0: if (localeID==NULL) { michael@0: localeID=uloc_getDefault(); michael@0: } michael@0: tmpLocaleID=localeID; michael@0: } michael@0: michael@0: /* Skip the language */ michael@0: ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); michael@0: if(_isIDSeparator(*tmpLocaleID)) { michael@0: const char *scriptID; michael@0: /* Skip the script if available */ michael@0: ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); michael@0: if(scriptID != tmpLocaleID+1) { michael@0: /* Found optional script */ michael@0: tmpLocaleID = scriptID; michael@0: } michael@0: /* Skip the Country */ michael@0: if (_isIDSeparator(*tmpLocaleID)) { michael@0: ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); michael@0: if(_isIDSeparator(*tmpLocaleID)) { michael@0: _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* keywords are located after '@' */ michael@0: if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { michael@0: i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); michael@0: } michael@0: michael@0: if(i) { michael@0: return uloc_openKeywordList(keywords, i, status); michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: michael@0: /* bit-flags for 'options' parameter of _canonicalize */ michael@0: #define _ULOC_STRIP_KEYWORDS 0x2 michael@0: #define _ULOC_CANONICALIZE 0x1 michael@0: michael@0: #define OPTION_SET(options, mask) ((options & mask) != 0) michael@0: michael@0: static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; michael@0: #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0]) michael@0: michael@0: /** michael@0: * Canonicalize the given localeID, to level 1 or to level 2, michael@0: * depending on the options. To specify level 1, pass in options=0. michael@0: * To specify level 2, pass in options=_ULOC_CANONICALIZE. michael@0: * michael@0: * This is the code underlying uloc_getName and uloc_canonicalize. michael@0: */ michael@0: static int32_t michael@0: _canonicalize(const char* localeID, michael@0: char* result, michael@0: int32_t resultCapacity, michael@0: uint32_t options, michael@0: UErrorCode* err) { michael@0: int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; michael@0: char localeBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: char tempBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: const char* origLocaleID; michael@0: const char* tmpLocaleID; michael@0: const char* keywordAssign = NULL; michael@0: const char* separatorIndicator = NULL; michael@0: const char* addKeyword = NULL; michael@0: const char* addValue = NULL; michael@0: char* name; michael@0: char* variant = NULL; /* pointer into name, or NULL */ michael@0: michael@0: if (U_FAILURE(*err)) { michael@0: return 0; michael@0: } michael@0: michael@0: if (_hasBCP47Extension(localeID)) { michael@0: _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); michael@0: } else { michael@0: if (localeID==NULL) { michael@0: localeID=uloc_getDefault(); michael@0: } michael@0: tmpLocaleID=localeID; michael@0: } michael@0: michael@0: origLocaleID=tmpLocaleID; michael@0: michael@0: /* if we are doing a full canonicalization, then put results in michael@0: localeBuffer, if necessary; otherwise send them to result. */ michael@0: if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/ michael@0: (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) { michael@0: name = localeBuffer; michael@0: nameCapacity = (int32_t)sizeof(localeBuffer); michael@0: } else { michael@0: name = result; michael@0: nameCapacity = resultCapacity; michael@0: } michael@0: michael@0: /* get all pieces, one after another, and separate with '_' */ michael@0: len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID); michael@0: michael@0: if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { michael@0: const char *d = uloc_getDefault(); michael@0: michael@0: len = (int32_t)uprv_strlen(d); michael@0: michael@0: if (name != NULL) { michael@0: uprv_strncpy(name, d, len); michael@0: } michael@0: } else if(_isIDSeparator(*tmpLocaleID)) { michael@0: const char *scriptID; michael@0: michael@0: ++fieldCount; michael@0: if(len 0) { michael@0: /* Found optional script */ michael@0: tmpLocaleID = scriptID; michael@0: ++fieldCount; michael@0: len+=scriptSize; michael@0: if (_isIDSeparator(*tmpLocaleID)) { michael@0: /* If there is something else, then we add the _ */ michael@0: if(len 0) { michael@0: /* Found optional country */ michael@0: tmpLocaleID = cntryID; michael@0: len+=cntrySize; michael@0: } michael@0: if(_isIDSeparator(*tmpLocaleID)) { michael@0: /* If there is something else, then we add the _ if we found country before. */ michael@0: if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) { michael@0: ++fieldCount; michael@0: if(len 0) { michael@0: variant = len 0)) { michael@0: do { michael@0: if(len 0)); michael@0: if (posixVariantSize > 0) { michael@0: if (variant == NULL) { michael@0: variant = name+len; michael@0: } michael@0: len += posixVariantSize; michael@0: variantSize += posixVariantSize; michael@0: } michael@0: } michael@0: michael@0: /* Handle generic variants first */ michael@0: if (variant) { michael@0: for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) { michael@0: const char* variantToCompare = VARIANT_MAP[j].variant; michael@0: int32_t n = (int32_t)uprv_strlen(variantToCompare); michael@0: int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n); michael@0: len -= variantLen; michael@0: if (variantLen > 0) { michael@0: if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */ michael@0: --len; michael@0: } michael@0: addKeyword = VARIANT_MAP[j].keyword; michael@0: addValue = VARIANT_MAP[j].value; michael@0: break; michael@0: } michael@0: } michael@0: if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */ michael@0: --len; michael@0: } michael@0: } michael@0: michael@0: /* Look up the ID in the canonicalization map */ michael@0: for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) { michael@0: const char* id = CANONICALIZE_MAP[j].id; michael@0: int32_t n = (int32_t)uprv_strlen(id); michael@0: if (len == n && uprv_strncmp(name, id, n) == 0) { michael@0: if (n == 0 && tmpLocaleID != NULL) { michael@0: break; /* Don't remap "" if keywords present */ michael@0: } michael@0: len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID); michael@0: if (CANONICALIZE_MAP[j].keyword) { michael@0: addKeyword = CANONICALIZE_MAP[j].keyword; michael@0: addValue = CANONICALIZE_MAP[j].value; michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { michael@0: if (tmpLocaleID!=NULL && keywordAssign!=NULL && michael@0: (!separatorIndicator || separatorIndicator > keywordAssign)) { michael@0: if(len 0) { michael@0: collVal[len] = 0; michael@0: michael@0: len = uloc_getBaseName(localeID, tmpLocaleID, michael@0: sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status); michael@0: michael@0: if (U_SUCCESS(status)) { michael@0: tmpLocaleID[len] = 0; michael@0: michael@0: len = uloc_setKeywordValue("collation", collVal, tmpLocaleID, michael@0: sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status); michael@0: michael@0: if (U_SUCCESS(status)) { michael@0: tmpLocaleID[len] = 0; michael@0: return uprv_convertToLCID(langID, tmpLocaleID, &status); michael@0: } michael@0: } michael@0: } michael@0: michael@0: // fall through - all keywords are simply ignored michael@0: status = U_ZERO_ERROR; michael@0: } michael@0: michael@0: return uprv_convertToLCID(langID, localeID, &status); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, michael@0: UErrorCode *status) michael@0: { michael@0: return uprv_convertToPosix(hostid, locale, localeCapacity, status); michael@0: } michael@0: michael@0: /* ### Default locale **************************************************/ michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: uloc_getDefault() michael@0: { michael@0: return locale_get_default(); michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: uloc_setDefault(const char* newDefaultLocale, michael@0: UErrorCode* err) michael@0: { michael@0: if (U_FAILURE(*err)) michael@0: return; michael@0: /* the error code isn't currently used for anything by this function*/ michael@0: michael@0: /* propagate change to C++ */ michael@0: locale_set_default(newDefaultLocale); michael@0: } michael@0: michael@0: /** michael@0: * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer michael@0: * to an array of pointers to arrays of char. All of these pointers are owned michael@0: * by ICU-- do not delete them, and do not write through them. The array is michael@0: * terminated with a null pointer. michael@0: */ michael@0: U_CAPI const char* const* U_EXPORT2 michael@0: uloc_getISOLanguages() michael@0: { michael@0: return LANGUAGES; michael@0: } michael@0: michael@0: /** michael@0: * Returns a list of all 2-letter country codes defined in ISO 639. This is a michael@0: * pointer to an array of pointers to arrays of char. All of these pointers are michael@0: * owned by ICU-- do not delete them, and do not write through them. The array is michael@0: * terminated with a null pointer. michael@0: */ michael@0: U_CAPI const char* const* U_EXPORT2 michael@0: uloc_getISOCountries() michael@0: { michael@0: return COUNTRIES; michael@0: } michael@0: michael@0: michael@0: /* this function to be moved into cstring.c later */ michael@0: static char gDecimal = 0; michael@0: michael@0: static /* U_CAPI */ michael@0: double michael@0: /* U_EXPORT2 */ michael@0: _uloc_strtod(const char *start, char **end) { michael@0: char *decimal; michael@0: char *myEnd; michael@0: char buf[30]; michael@0: double rv; michael@0: if (!gDecimal) { michael@0: char rep[5]; michael@0: /* For machines that decide to change the decimal on you, michael@0: and try to be too smart with localization. michael@0: This normally should be just a '.'. */ michael@0: sprintf(rep, "%+1.1f", 1.0); michael@0: gDecimal = rep[2]; michael@0: } michael@0: michael@0: if(gDecimal == '.') { michael@0: return uprv_strtod(start, end); /* fall through to OS */ michael@0: } else { michael@0: uprv_strncpy(buf, start, 29); michael@0: buf[29]=0; michael@0: decimal = uprv_strchr(buf, '.'); michael@0: if(decimal) { michael@0: *decimal = gDecimal; michael@0: } else { michael@0: return uprv_strtod(start, end); /* no decimal point */ michael@0: } michael@0: rv = uprv_strtod(buf, &myEnd); michael@0: if(end) { michael@0: *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ michael@0: } michael@0: return rv; michael@0: } michael@0: } michael@0: michael@0: typedef struct { michael@0: float q; michael@0: int32_t dummy; /* to avoid uninitialized memory copy from qsort */ michael@0: char *locale; michael@0: } _acceptLangItem; michael@0: michael@0: static int32_t U_CALLCONV michael@0: uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b) michael@0: { michael@0: const _acceptLangItem *aa = (const _acceptLangItem*)a; michael@0: const _acceptLangItem *bb = (const _acceptLangItem*)b; michael@0: michael@0: int32_t rc = 0; michael@0: if(bb->q < aa->q) { michael@0: rc = -1; /* A > B */ michael@0: } else if(bb->q > aa->q) { michael@0: rc = 1; /* A < B */ michael@0: } else { michael@0: rc = 0; /* A = B */ michael@0: } michael@0: michael@0: if(rc==0) { michael@0: rc = uprv_stricmp(aa->locale, bb->locale); michael@0: } michael@0: michael@0: #if defined(ULOC_DEBUG) michael@0: /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", michael@0: aa->locale, aa->q, michael@0: bb->locale, bb->q, michael@0: rc);*/ michael@0: #endif michael@0: michael@0: return rc; michael@0: } michael@0: michael@0: /* michael@0: mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 michael@0: */ michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, michael@0: const char *httpAcceptLanguage, michael@0: UEnumeration* availableLocales, michael@0: UErrorCode *status) michael@0: { michael@0: _acceptLangItem *j; michael@0: _acceptLangItem smallBuffer[30]; michael@0: char **strs; michael@0: char tmp[ULOC_FULLNAME_CAPACITY +1]; michael@0: int32_t n = 0; michael@0: const char *itemEnd; michael@0: const char *paramEnd; michael@0: const char *s; michael@0: const char *t; michael@0: int32_t res; michael@0: int32_t i; michael@0: int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); michael@0: int32_t jSize; michael@0: char *tempstr; /* Use for null pointer check */ michael@0: michael@0: j = smallBuffer; michael@0: jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]); michael@0: if(U_FAILURE(*status)) { michael@0: return -1; michael@0: } michael@0: michael@0: for(s=httpAcceptLanguage;s&&*s;) { michael@0: while(isspace(*s)) /* eat space at the beginning */ michael@0: s++; michael@0: itemEnd=uprv_strchr(s,','); michael@0: paramEnd=uprv_strchr(s,';'); michael@0: if(!itemEnd) { michael@0: itemEnd = httpAcceptLanguage+l; /* end of string */ michael@0: } michael@0: if(paramEnd && paramEnds)&&isspace(*t);t--) michael@0: ; michael@0: /* Check for null pointer from uprv_strndup */ michael@0: tempstr = uprv_strndup(s,(int32_t)((t+1)-s)); michael@0: if (tempstr == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return -1; michael@0: } michael@0: j[n].locale = tempstr; michael@0: uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status); michael@0: if(strcmp(j[n].locale,tmp)) { michael@0: uprv_free(j[n].locale); michael@0: j[n].locale=uprv_strdup(tmp); michael@0: } michael@0: #if defined(ULOC_DEBUG) michael@0: /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ michael@0: #endif michael@0: n++; michael@0: s = itemEnd; michael@0: while(*s==',') { /* eat duplicate commas */ michael@0: s++; michael@0: } michael@0: if(n>=jSize) { michael@0: if(j==smallBuffer) { /* overflowed the small buffer. */ michael@0: j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2))); michael@0: if(j!=NULL) { michael@0: uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize); michael@0: } michael@0: #if defined(ULOC_DEBUG) michael@0: fprintf(stderr,"malloced at size %d\n", jSize); michael@0: #endif michael@0: } else { michael@0: j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2)); michael@0: #if defined(ULOC_DEBUG) michael@0: fprintf(stderr,"re-alloced at size %d\n", jSize); michael@0: #endif michael@0: } michael@0: jSize *= 2; michael@0: if(j==NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return -1; michael@0: } michael@0: } michael@0: } michael@0: uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); michael@0: if(U_FAILURE(*status)) { michael@0: if(j != smallBuffer) { michael@0: #if defined(ULOC_DEBUG) michael@0: fprintf(stderr,"freeing j %p\n", j); michael@0: #endif michael@0: uprv_free(j); michael@0: } michael@0: return -1; michael@0: } michael@0: strs = static_cast(uprv_malloc((size_t)(sizeof(strs[0])*n))); michael@0: /* Check for null pointer */ michael@0: if (strs == NULL) { michael@0: uprv_free(j); /* Free to avoid memory leak */ michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return -1; michael@0: } michael@0: for(i=0;i q <%g>\n", i, j[i].locale, j[i].q);*/ michael@0: #endif michael@0: strs[i]=j[i].locale; michael@0: } michael@0: res = uloc_acceptLanguage(result, resultAvailable, outResult, michael@0: (const char**)strs, n, availableLocales, status); michael@0: for(i=0;i(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount))); michael@0: if(fallbackList==NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return -1; michael@0: } michael@0: for(i=0;i0) { michael@0: uprv_strncpy(result, l, uprv_min(len, resultAvailable)); michael@0: } michael@0: for(j=0;jmaxLen) { michael@0: maxLen = len; michael@0: } michael@0: } michael@0: uenum_reset(availableLocales, status); michael@0: /* save off parent info */ michael@0: if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { michael@0: fallbackList[i] = uprv_strdup(tmp); michael@0: } else { michael@0: fallbackList[i]=0; michael@0: } michael@0: } michael@0: michael@0: for(maxLen--;maxLen>0;maxLen--) { michael@0: for(i=0;i0) { michael@0: uprv_strncpy(result, l, uprv_min(len, resultAvailable)); michael@0: } michael@0: for(j=0;j