Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (C) 2002-2011, International Business Machines Corporation and |
michael@0 | 4 | * others. All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | */ |
michael@0 | 7 | #include "unicode/utypes.h" |
michael@0 | 8 | |
michael@0 | 9 | #if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION |
michael@0 | 10 | |
michael@0 | 11 | #include "unicode/resbund.h" |
michael@0 | 12 | #include "cmemory.h" |
michael@0 | 13 | #include "ustrfmt.h" |
michael@0 | 14 | #include "locutil.h" |
michael@0 | 15 | #include "charstr.h" |
michael@0 | 16 | #include "ucln_cmn.h" |
michael@0 | 17 | #include "uassert.h" |
michael@0 | 18 | #include "umutex.h" |
michael@0 | 19 | |
michael@0 | 20 | // see LocaleUtility::getAvailableLocaleNames |
michael@0 | 21 | static icu::Hashtable * LocaleUtility_cache = NULL; |
michael@0 | 22 | |
michael@0 | 23 | #define UNDERSCORE_CHAR ((UChar)0x005f) |
michael@0 | 24 | #define AT_SIGN_CHAR ((UChar)64) |
michael@0 | 25 | #define PERIOD_CHAR ((UChar)46) |
michael@0 | 26 | |
michael@0 | 27 | /* |
michael@0 | 28 | ****************************************************************** |
michael@0 | 29 | */ |
michael@0 | 30 | |
michael@0 | 31 | /** |
michael@0 | 32 | * Release all static memory held by Locale Utility. |
michael@0 | 33 | */ |
michael@0 | 34 | U_CDECL_BEGIN |
michael@0 | 35 | static UBool U_CALLCONV service_cleanup(void) { |
michael@0 | 36 | if (LocaleUtility_cache) { |
michael@0 | 37 | delete LocaleUtility_cache; |
michael@0 | 38 | LocaleUtility_cache = NULL; |
michael@0 | 39 | } |
michael@0 | 40 | return TRUE; |
michael@0 | 41 | } |
michael@0 | 42 | U_CDECL_END |
michael@0 | 43 | |
michael@0 | 44 | U_NAMESPACE_BEGIN |
michael@0 | 45 | |
michael@0 | 46 | UnicodeString& |
michael@0 | 47 | LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result) |
michael@0 | 48 | { |
michael@0 | 49 | if (id == NULL) { |
michael@0 | 50 | result.setToBogus(); |
michael@0 | 51 | } else { |
michael@0 | 52 | // Fix case only (no other changes) up to the first '@' or '.' or |
michael@0 | 53 | // end of string, whichever comes first. In 3.0 I changed this to |
michael@0 | 54 | // stop at first '@' or '.'. It used to run out to the end of |
michael@0 | 55 | // string. My fix makes the tests pass but is probably |
michael@0 | 56 | // structurally incorrect. See below. [alan 3.0] |
michael@0 | 57 | |
michael@0 | 58 | // TODO: Doug, you might want to revise this... |
michael@0 | 59 | result = *id; |
michael@0 | 60 | int32_t i = 0; |
michael@0 | 61 | int32_t end = result.indexOf(AT_SIGN_CHAR); |
michael@0 | 62 | int32_t n = result.indexOf(PERIOD_CHAR); |
michael@0 | 63 | if (n >= 0 && n < end) { |
michael@0 | 64 | end = n; |
michael@0 | 65 | } |
michael@0 | 66 | if (end < 0) { |
michael@0 | 67 | end = result.length(); |
michael@0 | 68 | } |
michael@0 | 69 | n = result.indexOf(UNDERSCORE_CHAR); |
michael@0 | 70 | if (n < 0) { |
michael@0 | 71 | n = end; |
michael@0 | 72 | } |
michael@0 | 73 | for (; i < n; ++i) { |
michael@0 | 74 | UChar c = result.charAt(i); |
michael@0 | 75 | if (c >= 0x0041 && c <= 0x005a) { |
michael@0 | 76 | c += 0x20; |
michael@0 | 77 | result.setCharAt(i, c); |
michael@0 | 78 | } |
michael@0 | 79 | } |
michael@0 | 80 | for (n = end; i < n; ++i) { |
michael@0 | 81 | UChar c = result.charAt(i); |
michael@0 | 82 | if (c >= 0x0061 && c <= 0x007a) { |
michael@0 | 83 | c -= 0x20; |
michael@0 | 84 | result.setCharAt(i, c); |
michael@0 | 85 | } |
michael@0 | 86 | } |
michael@0 | 87 | } |
michael@0 | 88 | return result; |
michael@0 | 89 | |
michael@0 | 90 | #if 0 |
michael@0 | 91 | // This code does a proper full level 2 canonicalization of id. |
michael@0 | 92 | // It's nasty to go from UChar to char to char to UChar -- but |
michael@0 | 93 | // that's what you have to do to use the uloc_canonicalize |
michael@0 | 94 | // function on UnicodeStrings. |
michael@0 | 95 | |
michael@0 | 96 | // I ended up doing the alternate fix (see above) not for |
michael@0 | 97 | // performance reasons, although performance will certainly be |
michael@0 | 98 | // better, but because doing a full level 2 canonicalization |
michael@0 | 99 | // causes some tests to fail. [alan 3.0] |
michael@0 | 100 | |
michael@0 | 101 | // TODO: Doug, you might want to revisit this... |
michael@0 | 102 | result.setToBogus(); |
michael@0 | 103 | if (id != 0) { |
michael@0 | 104 | int32_t buflen = id->length() + 8; // space for NUL |
michael@0 | 105 | char* buf = (char*) uprv_malloc(buflen); |
michael@0 | 106 | char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen); |
michael@0 | 107 | if (buf != 0 && canon != 0) { |
michael@0 | 108 | U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen); |
michael@0 | 109 | UErrorCode ec = U_ZERO_ERROR; |
michael@0 | 110 | uloc_canonicalize(buf, canon, buflen, &ec); |
michael@0 | 111 | if (U_SUCCESS(ec)) { |
michael@0 | 112 | result = UnicodeString(canon); |
michael@0 | 113 | } |
michael@0 | 114 | } |
michael@0 | 115 | uprv_free(buf); |
michael@0 | 116 | uprv_free(canon); |
michael@0 | 117 | } |
michael@0 | 118 | return result; |
michael@0 | 119 | #endif |
michael@0 | 120 | } |
michael@0 | 121 | |
michael@0 | 122 | Locale& |
michael@0 | 123 | LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result) |
michael@0 | 124 | { |
michael@0 | 125 | enum { BUFLEN = 128 }; // larger than ever needed |
michael@0 | 126 | |
michael@0 | 127 | if (id.isBogus() || id.length() >= BUFLEN) { |
michael@0 | 128 | result.setToBogus(); |
michael@0 | 129 | } else { |
michael@0 | 130 | /* |
michael@0 | 131 | * We need to convert from a UnicodeString to char * in order to |
michael@0 | 132 | * create a Locale. |
michael@0 | 133 | * |
michael@0 | 134 | * Problem: Locale ID strings may contain '@' which is a variant |
michael@0 | 135 | * character and cannot be handled by invariant-character conversion. |
michael@0 | 136 | * |
michael@0 | 137 | * Hack: Since ICU code can handle locale IDs with multiple encodings |
michael@0 | 138 | * of '@' (at least for EBCDIC; it's not known to be a problem for |
michael@0 | 139 | * ASCII-based systems), |
michael@0 | 140 | * we use regular invariant-character conversion for everything else |
michael@0 | 141 | * and manually convert U+0040 into a compiler-char-constant '@'. |
michael@0 | 142 | * While this compilation-time constant may not match the runtime |
michael@0 | 143 | * encoding of '@', it should be one of the encodings which ICU |
michael@0 | 144 | * recognizes. |
michael@0 | 145 | * |
michael@0 | 146 | * There should be only at most one '@' in a locale ID. |
michael@0 | 147 | */ |
michael@0 | 148 | char buffer[BUFLEN]; |
michael@0 | 149 | int32_t prev, i; |
michael@0 | 150 | prev = 0; |
michael@0 | 151 | for(;;) { |
michael@0 | 152 | i = id.indexOf((UChar)0x40, prev); |
michael@0 | 153 | if(i < 0) { |
michael@0 | 154 | // no @ between prev and the rest of the string |
michael@0 | 155 | id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV); |
michael@0 | 156 | break; // done |
michael@0 | 157 | } else { |
michael@0 | 158 | // normal invariant-character conversion for text between @s |
michael@0 | 159 | id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV); |
michael@0 | 160 | // manually "convert" U+0040 at id[i] into '@' at buffer[i] |
michael@0 | 161 | buffer[i] = '@'; |
michael@0 | 162 | prev = i + 1; |
michael@0 | 163 | } |
michael@0 | 164 | } |
michael@0 | 165 | result = Locale::createFromName(buffer); |
michael@0 | 166 | } |
michael@0 | 167 | return result; |
michael@0 | 168 | } |
michael@0 | 169 | |
michael@0 | 170 | UnicodeString& |
michael@0 | 171 | LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result) |
michael@0 | 172 | { |
michael@0 | 173 | if (locale.isBogus()) { |
michael@0 | 174 | result.setToBogus(); |
michael@0 | 175 | } else { |
michael@0 | 176 | result.append(UnicodeString(locale.getName(), -1, US_INV)); |
michael@0 | 177 | } |
michael@0 | 178 | return result; |
michael@0 | 179 | } |
michael@0 | 180 | |
michael@0 | 181 | const Hashtable* |
michael@0 | 182 | LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID) |
michael@0 | 183 | { |
michael@0 | 184 | // LocaleUtility_cache is a hash-of-hashes. The top-level keys |
michael@0 | 185 | // are path strings ('bundleID') passed to |
michael@0 | 186 | // ures_openAvailableLocales. The top-level values are |
michael@0 | 187 | // second-level hashes. The second-level keys are result strings |
michael@0 | 188 | // from ures_openAvailableLocales. The second-level values are |
michael@0 | 189 | // garbage ((void*)1 or other random pointer). |
michael@0 | 190 | |
michael@0 | 191 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 192 | Hashtable* cache; |
michael@0 | 193 | umtx_lock(NULL); |
michael@0 | 194 | cache = LocaleUtility_cache; |
michael@0 | 195 | umtx_unlock(NULL); |
michael@0 | 196 | |
michael@0 | 197 | if (cache == NULL) { |
michael@0 | 198 | cache = new Hashtable(status); |
michael@0 | 199 | if (cache == NULL || U_FAILURE(status)) { |
michael@0 | 200 | return NULL; // catastrophic failure; e.g. out of memory |
michael@0 | 201 | } |
michael@0 | 202 | cache->setValueDeleter(uhash_deleteHashtable); |
michael@0 | 203 | Hashtable* h; // set this to final LocaleUtility_cache value |
michael@0 | 204 | umtx_lock(NULL); |
michael@0 | 205 | h = LocaleUtility_cache; |
michael@0 | 206 | if (h == NULL) { |
michael@0 | 207 | LocaleUtility_cache = h = cache; |
michael@0 | 208 | cache = NULL; |
michael@0 | 209 | ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup); |
michael@0 | 210 | } |
michael@0 | 211 | umtx_unlock(NULL); |
michael@0 | 212 | if(cache != NULL) { |
michael@0 | 213 | delete cache; |
michael@0 | 214 | } |
michael@0 | 215 | cache = h; |
michael@0 | 216 | } |
michael@0 | 217 | |
michael@0 | 218 | U_ASSERT(cache != NULL); |
michael@0 | 219 | |
michael@0 | 220 | Hashtable* htp; |
michael@0 | 221 | umtx_lock(NULL); |
michael@0 | 222 | htp = (Hashtable*) cache->get(bundleID); |
michael@0 | 223 | umtx_unlock(NULL); |
michael@0 | 224 | |
michael@0 | 225 | if (htp == NULL) { |
michael@0 | 226 | htp = new Hashtable(status); |
michael@0 | 227 | if (htp && U_SUCCESS(status)) { |
michael@0 | 228 | CharString cbundleID; |
michael@0 | 229 | cbundleID.appendInvariantChars(bundleID, status); |
michael@0 | 230 | const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data(); |
michael@0 | 231 | UEnumeration *uenum = ures_openAvailableLocales(path, &status); |
michael@0 | 232 | for (;;) { |
michael@0 | 233 | const UChar* id = uenum_unext(uenum, NULL, &status); |
michael@0 | 234 | if (id == NULL) { |
michael@0 | 235 | break; |
michael@0 | 236 | } |
michael@0 | 237 | htp->put(UnicodeString(id), (void*)htp, status); |
michael@0 | 238 | } |
michael@0 | 239 | uenum_close(uenum); |
michael@0 | 240 | if (U_FAILURE(status)) { |
michael@0 | 241 | delete htp; |
michael@0 | 242 | return NULL; |
michael@0 | 243 | } |
michael@0 | 244 | umtx_lock(NULL); |
michael@0 | 245 | cache->put(bundleID, (void*)htp, status); |
michael@0 | 246 | umtx_unlock(NULL); |
michael@0 | 247 | } |
michael@0 | 248 | } |
michael@0 | 249 | return htp; |
michael@0 | 250 | } |
michael@0 | 251 | |
michael@0 | 252 | UBool |
michael@0 | 253 | LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child) |
michael@0 | 254 | { |
michael@0 | 255 | return child.indexOf(root) == 0 && |
michael@0 | 256 | (child.length() == root.length() || |
michael@0 | 257 | child.charAt(root.length()) == UNDERSCORE_CHAR); |
michael@0 | 258 | } |
michael@0 | 259 | |
michael@0 | 260 | U_NAMESPACE_END |
michael@0 | 261 | |
michael@0 | 262 | /* !UCONFIG_NO_SERVICE */ |
michael@0 | 263 | #endif |
michael@0 | 264 | |
michael@0 | 265 |