intl/icu/source/common/locutil.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/locutil.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,265 @@
     1.4 +/*
     1.5 + *******************************************************************************
     1.6 + * Copyright (C) 2002-2011, International Business Machines Corporation and
     1.7 + * others. All Rights Reserved.
     1.8 + *******************************************************************************
     1.9 + */
    1.10 +#include "unicode/utypes.h"
    1.11 +
    1.12 +#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
    1.13 +
    1.14 +#include "unicode/resbund.h"
    1.15 +#include "cmemory.h"
    1.16 +#include "ustrfmt.h"
    1.17 +#include "locutil.h"
    1.18 +#include "charstr.h"
    1.19 +#include "ucln_cmn.h"
    1.20 +#include "uassert.h"
    1.21 +#include "umutex.h"
    1.22 +
    1.23 +// see LocaleUtility::getAvailableLocaleNames
    1.24 +static icu::Hashtable * LocaleUtility_cache = NULL;
    1.25 +
    1.26 +#define UNDERSCORE_CHAR ((UChar)0x005f)
    1.27 +#define AT_SIGN_CHAR    ((UChar)64)
    1.28 +#define PERIOD_CHAR     ((UChar)46)
    1.29 +
    1.30 +/*
    1.31 + ******************************************************************
    1.32 + */
    1.33 +
    1.34 +/**
    1.35 + * Release all static memory held by Locale Utility.  
    1.36 + */
    1.37 +U_CDECL_BEGIN
    1.38 +static UBool U_CALLCONV service_cleanup(void) {
    1.39 +    if (LocaleUtility_cache) {
    1.40 +        delete LocaleUtility_cache;
    1.41 +        LocaleUtility_cache = NULL;
    1.42 +    }
    1.43 +    return TRUE;
    1.44 +}
    1.45 +U_CDECL_END
    1.46 +
    1.47 +U_NAMESPACE_BEGIN
    1.48 +
    1.49 +UnicodeString&
    1.50 +LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
    1.51 +{
    1.52 +  if (id == NULL) {
    1.53 +    result.setToBogus();
    1.54 +  } else {
    1.55 +    // Fix case only (no other changes) up to the first '@' or '.' or
    1.56 +    // end of string, whichever comes first.  In 3.0 I changed this to
    1.57 +    // stop at first '@' or '.'.  It used to run out to the end of
    1.58 +    // string.  My fix makes the tests pass but is probably
    1.59 +    // structurally incorrect.  See below.  [alan 3.0]
    1.60 +
    1.61 +    // TODO: Doug, you might want to revise this...
    1.62 +    result = *id;
    1.63 +    int32_t i = 0;
    1.64 +    int32_t end = result.indexOf(AT_SIGN_CHAR);
    1.65 +    int32_t n = result.indexOf(PERIOD_CHAR);
    1.66 +    if (n >= 0 && n < end) {
    1.67 +        end = n;
    1.68 +    }
    1.69 +    if (end < 0) {
    1.70 +        end = result.length();
    1.71 +    }
    1.72 +    n = result.indexOf(UNDERSCORE_CHAR);
    1.73 +    if (n < 0) {
    1.74 +      n = end;
    1.75 +    }
    1.76 +    for (; i < n; ++i) {
    1.77 +      UChar c = result.charAt(i);
    1.78 +      if (c >= 0x0041 && c <= 0x005a) {
    1.79 +        c += 0x20;
    1.80 +        result.setCharAt(i, c);
    1.81 +      }
    1.82 +    }
    1.83 +    for (n = end; i < n; ++i) {
    1.84 +      UChar c = result.charAt(i);
    1.85 +      if (c >= 0x0061 && c <= 0x007a) {
    1.86 +        c -= 0x20;
    1.87 +        result.setCharAt(i, c);
    1.88 +      }
    1.89 +    }
    1.90 +  }
    1.91 +  return result;
    1.92 +
    1.93 +#if 0
    1.94 +    // This code does a proper full level 2 canonicalization of id.
    1.95 +    // It's nasty to go from UChar to char to char to UChar -- but
    1.96 +    // that's what you have to do to use the uloc_canonicalize
    1.97 +    // function on UnicodeStrings.
    1.98 +
    1.99 +    // I ended up doing the alternate fix (see above) not for
   1.100 +    // performance reasons, although performance will certainly be
   1.101 +    // better, but because doing a full level 2 canonicalization
   1.102 +    // causes some tests to fail.  [alan 3.0]
   1.103 +
   1.104 +    // TODO: Doug, you might want to revisit this...
   1.105 +    result.setToBogus();
   1.106 +    if (id != 0) {
   1.107 +        int32_t buflen = id->length() + 8; // space for NUL
   1.108 +        char* buf = (char*) uprv_malloc(buflen);
   1.109 +        char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
   1.110 +        if (buf != 0 && canon != 0) {
   1.111 +            U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
   1.112 +            UErrorCode ec = U_ZERO_ERROR;
   1.113 +            uloc_canonicalize(buf, canon, buflen, &ec);
   1.114 +            if (U_SUCCESS(ec)) {
   1.115 +                result = UnicodeString(canon);
   1.116 +            }
   1.117 +        }
   1.118 +        uprv_free(buf);
   1.119 +        uprv_free(canon);
   1.120 +    }
   1.121 +    return result;
   1.122 +#endif
   1.123 +}
   1.124 +
   1.125 +Locale&
   1.126 +LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
   1.127 +{
   1.128 +    enum { BUFLEN = 128 }; // larger than ever needed
   1.129 +
   1.130 +    if (id.isBogus() || id.length() >= BUFLEN) {
   1.131 +        result.setToBogus();
   1.132 +    } else {
   1.133 +        /*
   1.134 +         * We need to convert from a UnicodeString to char * in order to
   1.135 +         * create a Locale.
   1.136 +         *
   1.137 +         * Problem: Locale ID strings may contain '@' which is a variant
   1.138 +         * character and cannot be handled by invariant-character conversion.
   1.139 +         *
   1.140 +         * Hack: Since ICU code can handle locale IDs with multiple encodings
   1.141 +         * of '@' (at least for EBCDIC; it's not known to be a problem for
   1.142 +         * ASCII-based systems),
   1.143 +         * we use regular invariant-character conversion for everything else
   1.144 +         * and manually convert U+0040 into a compiler-char-constant '@'.
   1.145 +         * While this compilation-time constant may not match the runtime
   1.146 +         * encoding of '@', it should be one of the encodings which ICU
   1.147 +         * recognizes.
   1.148 +         *
   1.149 +         * There should be only at most one '@' in a locale ID.
   1.150 +         */
   1.151 +        char buffer[BUFLEN];
   1.152 +        int32_t prev, i;
   1.153 +        prev = 0;
   1.154 +        for(;;) {
   1.155 +            i = id.indexOf((UChar)0x40, prev);
   1.156 +            if(i < 0) {
   1.157 +                // no @ between prev and the rest of the string
   1.158 +                id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
   1.159 +                break; // done
   1.160 +            } else {
   1.161 +                // normal invariant-character conversion for text between @s
   1.162 +                id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
   1.163 +                // manually "convert" U+0040 at id[i] into '@' at buffer[i]
   1.164 +                buffer[i] = '@';
   1.165 +                prev = i + 1;
   1.166 +            }
   1.167 +        }
   1.168 +        result = Locale::createFromName(buffer);
   1.169 +    }
   1.170 +    return result;
   1.171 +}
   1.172 +
   1.173 +UnicodeString&
   1.174 +LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
   1.175 +{
   1.176 +    if (locale.isBogus()) {
   1.177 +        result.setToBogus();
   1.178 +    } else {
   1.179 +        result.append(UnicodeString(locale.getName(), -1, US_INV));
   1.180 +    }
   1.181 +    return result;
   1.182 +}
   1.183 +
   1.184 +const Hashtable*
   1.185 +LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
   1.186 +{
   1.187 +    // LocaleUtility_cache is a hash-of-hashes.  The top-level keys
   1.188 +    // are path strings ('bundleID') passed to
   1.189 +    // ures_openAvailableLocales.  The top-level values are
   1.190 +    // second-level hashes.  The second-level keys are result strings
   1.191 +    // from ures_openAvailableLocales.  The second-level values are
   1.192 +    // garbage ((void*)1 or other random pointer).
   1.193 +
   1.194 +    UErrorCode status = U_ZERO_ERROR;
   1.195 +    Hashtable* cache;
   1.196 +    umtx_lock(NULL);
   1.197 +    cache = LocaleUtility_cache;
   1.198 +    umtx_unlock(NULL);
   1.199 +
   1.200 +    if (cache == NULL) {
   1.201 +        cache = new Hashtable(status);
   1.202 +        if (cache == NULL || U_FAILURE(status)) {
   1.203 +            return NULL; // catastrophic failure; e.g. out of memory
   1.204 +        }
   1.205 +        cache->setValueDeleter(uhash_deleteHashtable);
   1.206 +        Hashtable* h; // set this to final LocaleUtility_cache value
   1.207 +        umtx_lock(NULL);
   1.208 +        h = LocaleUtility_cache;
   1.209 +        if (h == NULL) {
   1.210 +            LocaleUtility_cache = h = cache;
   1.211 +            cache = NULL;
   1.212 +            ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
   1.213 +        }
   1.214 +        umtx_unlock(NULL);
   1.215 +        if(cache != NULL) {
   1.216 +          delete cache;
   1.217 +        }
   1.218 +        cache = h;
   1.219 +    }
   1.220 +
   1.221 +    U_ASSERT(cache != NULL);
   1.222 +
   1.223 +    Hashtable* htp;
   1.224 +    umtx_lock(NULL);
   1.225 +    htp = (Hashtable*) cache->get(bundleID);
   1.226 +    umtx_unlock(NULL);
   1.227 +
   1.228 +    if (htp == NULL) {
   1.229 +        htp = new Hashtable(status);
   1.230 +        if (htp && U_SUCCESS(status)) {
   1.231 +            CharString cbundleID;
   1.232 +            cbundleID.appendInvariantChars(bundleID, status);
   1.233 +            const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
   1.234 +            UEnumeration *uenum = ures_openAvailableLocales(path, &status);
   1.235 +            for (;;) {
   1.236 +                const UChar* id = uenum_unext(uenum, NULL, &status);
   1.237 +                if (id == NULL) {
   1.238 +                    break;
   1.239 +                }
   1.240 +                htp->put(UnicodeString(id), (void*)htp, status);
   1.241 +            }
   1.242 +            uenum_close(uenum);
   1.243 +            if (U_FAILURE(status)) {
   1.244 +                delete htp;
   1.245 +                return NULL;
   1.246 +            }
   1.247 +            umtx_lock(NULL);
   1.248 +            cache->put(bundleID, (void*)htp, status);
   1.249 +            umtx_unlock(NULL);
   1.250 +        }
   1.251 +    }
   1.252 +    return htp;
   1.253 +}
   1.254 +
   1.255 +UBool
   1.256 +LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
   1.257 +{
   1.258 +    return child.indexOf(root) == 0 &&
   1.259 +      (child.length() == root.length() ||
   1.260 +       child.charAt(root.length()) == UNDERSCORE_CHAR);
   1.261 +}
   1.262 +
   1.263 +U_NAMESPACE_END
   1.264 +
   1.265 +/* !UCONFIG_NO_SERVICE */
   1.266 +#endif
   1.267 +
   1.268 +

mercurial