intl/icu/source/i18n/transreg.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/transreg.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1362 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (c) 2001-2011, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   Date        Name        Description
    1.10 +*   08/10/2001  aliu        Creation.
    1.11 +**********************************************************************
    1.12 +*/
    1.13 +
    1.14 +#include "unicode/utypes.h"
    1.15 +
    1.16 +#if !UCONFIG_NO_TRANSLITERATION
    1.17 +
    1.18 +#include "unicode/translit.h"
    1.19 +#include "unicode/resbund.h"
    1.20 +#include "unicode/uniset.h"
    1.21 +#include "unicode/uscript.h"
    1.22 +#include "rbt.h"
    1.23 +#include "cpdtrans.h"
    1.24 +#include "nultrans.h"
    1.25 +#include "transreg.h"
    1.26 +#include "rbt_data.h"
    1.27 +#include "rbt_pars.h"
    1.28 +#include "tridpars.h"
    1.29 +#include "charstr.h"
    1.30 +#include "uassert.h"
    1.31 +#include "locutil.h"
    1.32 +
    1.33 +// Enable the following symbol to add debugging code that tracks the
    1.34 +// allocation, deletion, and use of Entry objects.  BoundsChecker has
    1.35 +// reported dangling pointer errors with these objects, but I have
    1.36 +// been unable to confirm them.  I suspect BoundsChecker is getting
    1.37 +// confused with pointers going into and coming out of a UHashtable,
    1.38 +// despite the hinting code that is designed to help it.
    1.39 +// #define DEBUG_MEM
    1.40 +#ifdef DEBUG_MEM
    1.41 +#include <stdio.h>
    1.42 +#endif
    1.43 +
    1.44 +// UChar constants
    1.45 +static const UChar LOCALE_SEP  = 95; // '_'
    1.46 +//static const UChar ID_SEP      = 0x002D; /*-*/
    1.47 +//static const UChar VARIANT_SEP = 0x002F; // '/'
    1.48 +
    1.49 +// String constants
    1.50 +static const UChar ANY[] = { 65, 110, 121, 0 }; // Any
    1.51 +
    1.52 +// empty string
    1.53 +#define NO_VARIANT UnicodeString()
    1.54 +
    1.55 +/**
    1.56 + * Resource bundle key for the RuleBasedTransliterator rule.
    1.57 + */
    1.58 +//static const char RB_RULE[] = "Rule";
    1.59 +
    1.60 +U_NAMESPACE_BEGIN
    1.61 +
    1.62 +//------------------------------------------------------------------
    1.63 +// Alias
    1.64 +//------------------------------------------------------------------
    1.65 +
    1.66 +TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID,
    1.67 +                                         const UnicodeSet* cpdFilter) :
    1.68 +    ID(),
    1.69 +    aliasesOrRules(theAliasID),
    1.70 +    transes(0),
    1.71 +    compoundFilter(cpdFilter),
    1.72 +    direction(UTRANS_FORWARD),
    1.73 +    type(TransliteratorAlias::SIMPLE) {
    1.74 +}
    1.75 +
    1.76 +TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
    1.77 +                                         const UnicodeString& idBlocks,
    1.78 +                                         UVector* adoptedTransliterators,
    1.79 +                                         const UnicodeSet* cpdFilter) :
    1.80 +    ID(theID),
    1.81 +    aliasesOrRules(idBlocks),
    1.82 +    transes(adoptedTransliterators),
    1.83 +    compoundFilter(cpdFilter),
    1.84 +    direction(UTRANS_FORWARD),
    1.85 +    type(TransliteratorAlias::COMPOUND) {
    1.86 +}
    1.87 +
    1.88 +TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
    1.89 +                                         const UnicodeString& rules,
    1.90 +                                         UTransDirection dir) :
    1.91 +    ID(theID),
    1.92 +    aliasesOrRules(rules),
    1.93 +    transes(0),
    1.94 +    compoundFilter(0),
    1.95 +    direction(dir),
    1.96 +    type(TransliteratorAlias::RULES) {
    1.97 +}
    1.98 +
    1.99 +TransliteratorAlias::~TransliteratorAlias() {
   1.100 +    delete transes;
   1.101 +}
   1.102 +
   1.103 +
   1.104 +Transliterator* TransliteratorAlias::create(UParseError& pe,
   1.105 +                                            UErrorCode& ec) {
   1.106 +    if (U_FAILURE(ec)) {
   1.107 +        return 0;
   1.108 +    }
   1.109 +    Transliterator *t = NULL;
   1.110 +    switch (type) {
   1.111 +    case SIMPLE:
   1.112 +        t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
   1.113 +        if(U_FAILURE(ec)){
   1.114 +            return 0;
   1.115 +        }
   1.116 +        if (compoundFilter != 0)
   1.117 +            t->adoptFilter((UnicodeSet*)compoundFilter->clone());
   1.118 +        break;
   1.119 +    case COMPOUND:
   1.120 +        {
   1.121 +            // the total number of transliterators in the compound is the total number of anonymous transliterators
   1.122 +            // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
   1.123 +            // block and that each pair anonymous transliterators has an ID block between them.  Then we go back
   1.124 +            // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
   1.125 +            // marks the position where an anonymous transliterator goes) and adjust accordingly
   1.126 +            int32_t anonymousRBTs = transes->size();
   1.127 +            int32_t transCount = anonymousRBTs * 2 + 1;
   1.128 +            if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff))
   1.129 +                --transCount;
   1.130 +            if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff))
   1.131 +                --transCount;
   1.132 +            UnicodeString noIDBlock((UChar)(0xffff));
   1.133 +            noIDBlock += ((UChar)(0xffff));
   1.134 +            int32_t pos = aliasesOrRules.indexOf(noIDBlock);
   1.135 +            while (pos >= 0) {
   1.136 +                --transCount;
   1.137 +                pos = aliasesOrRules.indexOf(noIDBlock, pos + 1);
   1.138 +            }
   1.139 +
   1.140 +            UVector transliterators(ec);
   1.141 +            UnicodeString idBlock;
   1.142 +            int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
   1.143 +            while (blockSeparatorPos >= 0) {
   1.144 +                aliasesOrRules.extract(0, blockSeparatorPos, idBlock);
   1.145 +                aliasesOrRules.remove(0, blockSeparatorPos + 1);
   1.146 +                if (!idBlock.isEmpty())
   1.147 +                    transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
   1.148 +                if (!transes->isEmpty())
   1.149 +                    transliterators.addElement(transes->orphanElementAt(0), ec);
   1.150 +                blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
   1.151 +            }
   1.152 +            if (!aliasesOrRules.isEmpty())
   1.153 +                transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
   1.154 +            while (!transes->isEmpty())
   1.155 +                transliterators.addElement(transes->orphanElementAt(0), ec);
   1.156 +
   1.157 +            if (U_SUCCESS(ec)) {
   1.158 +                t = new CompoundTransliterator(ID, transliterators,
   1.159 +                    (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0),
   1.160 +                    anonymousRBTs, pe, ec);
   1.161 +                if (t == 0) {
   1.162 +                    ec = U_MEMORY_ALLOCATION_ERROR;
   1.163 +                    return 0;
   1.164 +                }
   1.165 +            } else {
   1.166 +                for (int32_t i = 0; i < transliterators.size(); i++)
   1.167 +                    delete (Transliterator*)(transliterators.elementAt(i));
   1.168 +            }
   1.169 +        }
   1.170 +        break;
   1.171 +    case RULES:
   1.172 +        U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
   1.173 +        break;
   1.174 +    }
   1.175 +    return t;
   1.176 +}
   1.177 +
   1.178 +UBool TransliteratorAlias::isRuleBased() const {
   1.179 +    return type == RULES;
   1.180 +}
   1.181 +
   1.182 +void TransliteratorAlias::parse(TransliteratorParser& parser,
   1.183 +                                UParseError& pe, UErrorCode& ec) const {
   1.184 +    U_ASSERT(type == RULES);
   1.185 +    if (U_FAILURE(ec)) {
   1.186 +        return;
   1.187 +    }
   1.188 +
   1.189 +    parser.parse(aliasesOrRules, direction, pe, ec);
   1.190 +}
   1.191 +
   1.192 +//----------------------------------------------------------------------
   1.193 +// class TransliteratorSpec
   1.194 +//----------------------------------------------------------------------
   1.195 +
   1.196 +/**
   1.197 + * A TransliteratorSpec is a string specifying either a source or a target.  In more
   1.198 + * general terms, it may also specify a variant, but we only use the
   1.199 + * Spec class for sources and targets.
   1.200 + *
   1.201 + * A Spec may be a locale or a script.  If it is a locale, it has a
   1.202 + * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
   1.203 + * ssss is the script mapping of xx_YY_ZZZ.  The Spec API methods
   1.204 + * hasFallback(), next(), and reset() iterate over this fallback
   1.205 + * sequence.
   1.206 + *
   1.207 + * The Spec class canonicalizes itself, so the locale is put into
   1.208 + * canonical form, or the script is transformed from an abbreviation
   1.209 + * to a full name.
   1.210 + */
   1.211 +class TransliteratorSpec : public UMemory {
   1.212 + public:
   1.213 +    TransliteratorSpec(const UnicodeString& spec);
   1.214 +    ~TransliteratorSpec();
   1.215 +
   1.216 +    const UnicodeString& get() const;
   1.217 +    UBool hasFallback() const;
   1.218 +    const UnicodeString& next();
   1.219 +    void reset();
   1.220 +
   1.221 +    UBool isLocale() const;
   1.222 +    ResourceBundle& getBundle() const;
   1.223 +
   1.224 +    operator const UnicodeString&() const { return get(); }
   1.225 +    const UnicodeString& getTop() const { return top; }
   1.226 +
   1.227 + private:
   1.228 +    void setupNext();
   1.229 +
   1.230 +    UnicodeString top;
   1.231 +    UnicodeString spec;
   1.232 +    UnicodeString nextSpec;
   1.233 +    UnicodeString scriptName;
   1.234 +    UBool isSpecLocale; // TRUE if spec is a locale
   1.235 +    UBool isNextLocale; // TRUE if nextSpec is a locale
   1.236 +    ResourceBundle* res;
   1.237 +
   1.238 +    TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class
   1.239 +    TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class
   1.240 +};
   1.241 +
   1.242 +TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec)
   1.243 +: top(theSpec),
   1.244 +  res(0)
   1.245 +{
   1.246 +    UErrorCode status = U_ZERO_ERROR;
   1.247 +    Locale topLoc("");
   1.248 +    LocaleUtility::initLocaleFromName(theSpec, topLoc);
   1.249 +    if (!topLoc.isBogus()) {
   1.250 +        res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status);
   1.251 +        /* test for NULL */
   1.252 +        if (res == 0) {
   1.253 +            return;
   1.254 +        }
   1.255 +        if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
   1.256 +            delete res;
   1.257 +            res = 0;
   1.258 +        }
   1.259 +    }
   1.260 +
   1.261 +    // Canonicalize script name -or- do locale->script mapping
   1.262 +    status = U_ZERO_ERROR;
   1.263 +    static const int32_t capacity = 10;
   1.264 +    UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
   1.265 +    int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(),
   1.266 +                                  script, capacity, &status);
   1.267 +    if (num > 0 && script[0] != USCRIPT_INVALID_CODE) {
   1.268 +        scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV);
   1.269 +    }
   1.270 +
   1.271 +    // Canonicalize top
   1.272 +    if (res != 0) {
   1.273 +        // Canonicalize locale name
   1.274 +        UnicodeString locStr;
   1.275 +        LocaleUtility::initNameFromLocale(topLoc, locStr);
   1.276 +        if (!locStr.isBogus()) {
   1.277 +            top = locStr;
   1.278 +        }
   1.279 +    } else if (scriptName.length() != 0) {
   1.280 +        // We are a script; use canonical name
   1.281 +        top = scriptName;
   1.282 +    }
   1.283 +
   1.284 +    // assert(spec != top);
   1.285 +    reset();
   1.286 +}
   1.287 +
   1.288 +TransliteratorSpec::~TransliteratorSpec() {
   1.289 +    delete res;
   1.290 +}
   1.291 +
   1.292 +UBool TransliteratorSpec::hasFallback() const {
   1.293 +    return nextSpec.length() != 0;
   1.294 +}
   1.295 +
   1.296 +void TransliteratorSpec::reset() {
   1.297 +    if (spec != top) {
   1.298 +        spec = top;
   1.299 +        isSpecLocale = (res != 0);
   1.300 +        setupNext();
   1.301 +    }
   1.302 +}
   1.303 +
   1.304 +void TransliteratorSpec::setupNext() {
   1.305 +    isNextLocale = FALSE;
   1.306 +    if (isSpecLocale) {
   1.307 +        nextSpec = spec;
   1.308 +        int32_t i = nextSpec.lastIndexOf(LOCALE_SEP);
   1.309 +        // If i == 0 then we have _FOO, so we fall through
   1.310 +        // to the scriptName.
   1.311 +        if (i > 0) {
   1.312 +            nextSpec.truncate(i);
   1.313 +            isNextLocale = TRUE;
   1.314 +        } else {
   1.315 +            nextSpec = scriptName; // scriptName may be empty
   1.316 +        }
   1.317 +    } else {
   1.318 +        // spec is a script, so we are at the end
   1.319 +        nextSpec.truncate(0);
   1.320 +    }
   1.321 +}
   1.322 +
   1.323 +// Protocol:
   1.324 +// for(const UnicodeString& s(spec.get());
   1.325 +//     spec.hasFallback(); s(spec.next())) { ...
   1.326 +
   1.327 +const UnicodeString& TransliteratorSpec::next() {
   1.328 +    spec = nextSpec;
   1.329 +    isSpecLocale = isNextLocale;
   1.330 +    setupNext();
   1.331 +    return spec;
   1.332 +}
   1.333 +
   1.334 +const UnicodeString& TransliteratorSpec::get() const {
   1.335 +    return spec;
   1.336 +}
   1.337 +
   1.338 +UBool TransliteratorSpec::isLocale() const {
   1.339 +    return isSpecLocale;
   1.340 +}
   1.341 +
   1.342 +ResourceBundle& TransliteratorSpec::getBundle() const {
   1.343 +    return *res;
   1.344 +}
   1.345 +
   1.346 +//----------------------------------------------------------------------
   1.347 +
   1.348 +#ifdef DEBUG_MEM
   1.349 +
   1.350 +// Vector of Entry pointers currently in use
   1.351 +static UVector* DEBUG_entries = NULL;
   1.352 +
   1.353 +static void DEBUG_setup() {
   1.354 +    if (DEBUG_entries == NULL) {
   1.355 +        UErrorCode ec = U_ZERO_ERROR;
   1.356 +        DEBUG_entries = new UVector(ec);
   1.357 +    }
   1.358 +}
   1.359 +
   1.360 +// Caller must call DEBUG_setup first.  Return index of given Entry,
   1.361 +// if it is in use (not deleted yet), or -1 if not found.
   1.362 +static int DEBUG_findEntry(TransliteratorEntry* e) {
   1.363 +    for (int i=0; i<DEBUG_entries->size(); ++i) {
   1.364 +        if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) {
   1.365 +            return i;
   1.366 +        }
   1.367 +    }
   1.368 +    return -1;
   1.369 +}
   1.370 +
   1.371 +// Track object creation
   1.372 +static void DEBUG_newEntry(TransliteratorEntry* e) {
   1.373 +    DEBUG_setup();
   1.374 +    if (DEBUG_findEntry(e) >= 0) {
   1.375 +        // This should really never happen unless the heap is broken
   1.376 +        printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e);
   1.377 +        return;
   1.378 +    }
   1.379 +    UErrorCode ec = U_ZERO_ERROR;
   1.380 +    DEBUG_entries->addElement(e, ec);
   1.381 +}
   1.382 +
   1.383 +// Track object deletion
   1.384 +static void DEBUG_delEntry(TransliteratorEntry* e) {
   1.385 +    DEBUG_setup();
   1.386 +    int i = DEBUG_findEntry(e);
   1.387 +    if (i < 0) {
   1.388 +        printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e);
   1.389 +        return;
   1.390 +    }
   1.391 +    DEBUG_entries->removeElementAt(i);
   1.392 +}
   1.393 +
   1.394 +// Track object usage
   1.395 +static void DEBUG_useEntry(TransliteratorEntry* e) {
   1.396 +    if (e == NULL) return;
   1.397 +    DEBUG_setup();
   1.398 +    int i = DEBUG_findEntry(e);
   1.399 +    if (i < 0) {
   1.400 +        printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e);
   1.401 +    }
   1.402 +}
   1.403 +
   1.404 +#else
   1.405 +// If we're not debugging then make these macros into NOPs
   1.406 +#define DEBUG_newEntry(x)
   1.407 +#define DEBUG_delEntry(x)
   1.408 +#define DEBUG_useEntry(x)
   1.409 +#endif
   1.410 +
   1.411 +//----------------------------------------------------------------------
   1.412 +// class Entry
   1.413 +//----------------------------------------------------------------------
   1.414 +
   1.415 +/**
   1.416 + * The Entry object stores objects of different types and
   1.417 + * singleton objects as placeholders for rule-based transliterators to
   1.418 + * be built as needed.  Instances of this struct can be placeholders,
   1.419 + * can represent prototype transliterators to be cloned, or can
   1.420 + * represent TransliteratorData objects.  We don't support storing
   1.421 + * classes in the registry because we don't have the rtti infrastructure
   1.422 + * for it.  We could easily add this if there is a need for it in the
   1.423 + * future.
   1.424 + */
   1.425 +class TransliteratorEntry : public UMemory {
   1.426 +public:
   1.427 +    enum Type {
   1.428 +        RULES_FORWARD,
   1.429 +        RULES_REVERSE,
   1.430 +        LOCALE_RULES,
   1.431 +        PROTOTYPE,
   1.432 +        RBT_DATA,
   1.433 +        COMPOUND_RBT,
   1.434 +        ALIAS,
   1.435 +        FACTORY,
   1.436 +        NONE // Only used for uninitialized entries
   1.437 +    } entryType;
   1.438 +    // NOTE: stringArg cannot go inside the union because
   1.439 +    // it has a copy constructor
   1.440 +    UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT
   1.441 +    int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES
   1.442 +    UnicodeSet* compoundFilter; // For COMPOUND_RBT
   1.443 +    union {
   1.444 +        Transliterator* prototype; // For PROTOTYPE
   1.445 +        TransliterationRuleData* data; // For RBT_DATA
   1.446 +        UVector* dataVector;    // For COMPOUND_RBT
   1.447 +        struct {
   1.448 +            Transliterator::Factory function;
   1.449 +            Transliterator::Token   context;
   1.450 +        } factory; // For FACTORY
   1.451 +    } u;
   1.452 +    TransliteratorEntry();
   1.453 +    ~TransliteratorEntry();
   1.454 +    void adoptPrototype(Transliterator* adopted);
   1.455 +    void setFactory(Transliterator::Factory factory,
   1.456 +                    Transliterator::Token context);
   1.457 +
   1.458 +private:
   1.459 +
   1.460 +    TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class
   1.461 +    TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class
   1.462 +};
   1.463 +
   1.464 +TransliteratorEntry::TransliteratorEntry() {
   1.465 +    u.prototype = 0;
   1.466 +    compoundFilter = NULL;
   1.467 +    entryType = NONE;
   1.468 +    DEBUG_newEntry(this);
   1.469 +}
   1.470 +
   1.471 +TransliteratorEntry::~TransliteratorEntry() {
   1.472 +    DEBUG_delEntry(this);
   1.473 +    if (entryType == PROTOTYPE) {
   1.474 +        delete u.prototype;
   1.475 +    } else if (entryType == RBT_DATA) {
   1.476 +        // The data object is shared between instances of RBT.  The
   1.477 +        // entry object owns it.  It should only be deleted when the
   1.478 +        // transliterator component is being cleaned up.  Doing so
   1.479 +        // invalidates any RBTs that the user has instantiated.
   1.480 +        delete u.data;
   1.481 +    } else if (entryType == COMPOUND_RBT) {
   1.482 +        while (u.dataVector != NULL && !u.dataVector->isEmpty())
   1.483 +            delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0);
   1.484 +        delete u.dataVector;
   1.485 +    }
   1.486 +    delete compoundFilter;
   1.487 +}
   1.488 +
   1.489 +void TransliteratorEntry::adoptPrototype(Transliterator* adopted) {
   1.490 +    if (entryType == PROTOTYPE) {
   1.491 +        delete u.prototype;
   1.492 +    }
   1.493 +    entryType = PROTOTYPE;
   1.494 +    u.prototype = adopted;
   1.495 +}
   1.496 +
   1.497 +void TransliteratorEntry::setFactory(Transliterator::Factory factory,
   1.498 +                       Transliterator::Token context) {
   1.499 +    if (entryType == PROTOTYPE) {
   1.500 +        delete u.prototype;
   1.501 +    }
   1.502 +    entryType = FACTORY;
   1.503 +    u.factory.function = factory;
   1.504 +    u.factory.context = context;
   1.505 +}
   1.506 +
   1.507 +// UObjectDeleter for Hashtable::setValueDeleter
   1.508 +U_CDECL_BEGIN
   1.509 +static void U_CALLCONV
   1.510 +deleteEntry(void* obj) {
   1.511 +    delete (TransliteratorEntry*) obj;
   1.512 +}
   1.513 +U_CDECL_END
   1.514 +
   1.515 +//----------------------------------------------------------------------
   1.516 +// class TransliteratorRegistry: Basic public API
   1.517 +//----------------------------------------------------------------------
   1.518 +
   1.519 +TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
   1.520 +    registry(TRUE, status),
   1.521 +    specDAG(TRUE, status),
   1.522 +    availableIDs(status)
   1.523 +{
   1.524 +    registry.setValueDeleter(deleteEntry);
   1.525 +    availableIDs.setDeleter(uprv_deleteUObject);
   1.526 +    availableIDs.setComparer(uhash_compareCaselessUnicodeString);
   1.527 +    specDAG.setValueDeleter(uhash_deleteHashtable);
   1.528 +}
   1.529 +
   1.530 +TransliteratorRegistry::~TransliteratorRegistry() {
   1.531 +    // Through the magic of C++, everything cleans itself up
   1.532 +}
   1.533 +
   1.534 +Transliterator* TransliteratorRegistry::get(const UnicodeString& ID,
   1.535 +                                            TransliteratorAlias*& aliasReturn,
   1.536 +                                            UErrorCode& status) {
   1.537 +    U_ASSERT(aliasReturn == NULL);
   1.538 +    TransliteratorEntry *entry = find(ID);
   1.539 +    return (entry == 0) ? 0
   1.540 +        : instantiateEntry(ID, entry, aliasReturn, status);
   1.541 +}
   1.542 +
   1.543 +Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
   1.544 +                                              TransliteratorParser& parser,
   1.545 +                                              TransliteratorAlias*& aliasReturn,
   1.546 +                                              UErrorCode& status) {
   1.547 +    U_ASSERT(aliasReturn == NULL);
   1.548 +    TransliteratorEntry *entry = find(ID);
   1.549 +
   1.550 +    if (entry == 0) {
   1.551 +        // We get to this point if there are two threads, one of which
   1.552 +        // is instantiating an ID, and another of which is removing
   1.553 +        // the same ID from the registry, and the timing is just right.
   1.554 +        return 0;
   1.555 +    }
   1.556 +
   1.557 +    // The usage model for the caller is that they will first call
   1.558 +    // reg->get() inside the mutex, they'll get back an alias, they call
   1.559 +    // alias->isRuleBased(), and if they get TRUE, they call alias->parse()
   1.560 +    // outside the mutex, then reg->reget() inside the mutex again.  A real
   1.561 +    // mess, but it gets things working for ICU 3.0. [alan].
   1.562 +
   1.563 +    // Note: It's possible that in between the caller calling
   1.564 +    // alias->parse() and reg->reget(), that another thread will have
   1.565 +    // called reg->reget(), and the entry will already have been fixed up.
   1.566 +    // We have to detect this so we don't stomp over existing entry
   1.567 +    // data members and potentially leak memory (u.data and compoundFilter).
   1.568 +
   1.569 +    if (entry->entryType == TransliteratorEntry::RULES_FORWARD ||
   1.570 +        entry->entryType == TransliteratorEntry::RULES_REVERSE ||
   1.571 +        entry->entryType == TransliteratorEntry::LOCALE_RULES) {
   1.572 +        
   1.573 +        if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
   1.574 +            entry->u.data = 0;
   1.575 +            entry->entryType = TransliteratorEntry::ALIAS;
   1.576 +            entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
   1.577 +        }
   1.578 +        else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) {
   1.579 +            entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
   1.580 +            entry->entryType = TransliteratorEntry::RBT_DATA;
   1.581 +        }
   1.582 +        else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) {
   1.583 +            entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0));
   1.584 +            entry->compoundFilter = parser.orphanCompoundFilter();
   1.585 +            entry->entryType = TransliteratorEntry::ALIAS;
   1.586 +        }
   1.587 +        else {
   1.588 +            entry->entryType = TransliteratorEntry::COMPOUND_RBT;
   1.589 +            entry->compoundFilter = parser.orphanCompoundFilter();
   1.590 +            entry->u.dataVector = new UVector(status);
   1.591 +            entry->stringArg.remove();
   1.592 +
   1.593 +            int32_t limit = parser.idBlockVector.size();
   1.594 +            if (parser.dataVector.size() > limit)
   1.595 +                limit = parser.dataVector.size();
   1.596 +
   1.597 +            for (int32_t i = 0; i < limit; i++) {
   1.598 +                if (i < parser.idBlockVector.size()) {
   1.599 +                    UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
   1.600 +                    if (!idBlock->isEmpty())
   1.601 +                        entry->stringArg += *idBlock;
   1.602 +                }
   1.603 +                if (!parser.dataVector.isEmpty()) {
   1.604 +                    TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
   1.605 +                    entry->u.dataVector->addElement(data, status);
   1.606 +                    entry->stringArg += (UChar)0xffff;  // use U+FFFF to mark position of RBTs in ID block
   1.607 +                }
   1.608 +            }
   1.609 +        }
   1.610 +    }
   1.611 +
   1.612 +    Transliterator *t =
   1.613 +        instantiateEntry(ID, entry, aliasReturn, status);
   1.614 +    return t;
   1.615 +}
   1.616 +
   1.617 +void TransliteratorRegistry::put(Transliterator* adoptedProto,
   1.618 +                                 UBool visible,
   1.619 +                                 UErrorCode& ec)
   1.620 +{
   1.621 +    TransliteratorEntry *entry = new TransliteratorEntry();
   1.622 +    if (entry == NULL) {
   1.623 +        ec = U_MEMORY_ALLOCATION_ERROR;
   1.624 +        return;
   1.625 +    }
   1.626 +    entry->adoptPrototype(adoptedProto);
   1.627 +    registerEntry(adoptedProto->getID(), entry, visible);
   1.628 +}
   1.629 +
   1.630 +void TransliteratorRegistry::put(const UnicodeString& ID,
   1.631 +                                 Transliterator::Factory factory,
   1.632 +                                 Transliterator::Token context,
   1.633 +                                 UBool visible,
   1.634 +                                 UErrorCode& ec) {
   1.635 +    TransliteratorEntry *entry = new TransliteratorEntry();
   1.636 +    if (entry == NULL) {
   1.637 +        ec = U_MEMORY_ALLOCATION_ERROR;
   1.638 +        return;
   1.639 +    }
   1.640 +    entry->setFactory(factory, context);
   1.641 +    registerEntry(ID, entry, visible);
   1.642 +}
   1.643 +
   1.644 +void TransliteratorRegistry::put(const UnicodeString& ID,
   1.645 +                                 const UnicodeString& resourceName,
   1.646 +                                 UTransDirection dir,
   1.647 +                                 UBool readonlyResourceAlias,
   1.648 +                                 UBool visible,
   1.649 +                                 UErrorCode& ec) {
   1.650 +    TransliteratorEntry *entry = new TransliteratorEntry();
   1.651 +    if (entry == NULL) {
   1.652 +        ec = U_MEMORY_ALLOCATION_ERROR;
   1.653 +        return;
   1.654 +    }
   1.655 +    entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD
   1.656 +        : TransliteratorEntry::RULES_REVERSE;
   1.657 +    if (readonlyResourceAlias) {
   1.658 +        entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1);
   1.659 +    }
   1.660 +    else {
   1.661 +        entry->stringArg = resourceName;
   1.662 +    }
   1.663 +    registerEntry(ID, entry, visible);
   1.664 +}
   1.665 +
   1.666 +void TransliteratorRegistry::put(const UnicodeString& ID,
   1.667 +                                 const UnicodeString& alias,
   1.668 +                                 UBool readonlyAliasAlias,
   1.669 +                                 UBool visible,
   1.670 +                                 UErrorCode& /*ec*/) {
   1.671 +    TransliteratorEntry *entry = new TransliteratorEntry();
   1.672 +    // Null pointer check
   1.673 +    if (entry != NULL) {
   1.674 +        entry->entryType = TransliteratorEntry::ALIAS;
   1.675 +        if (readonlyAliasAlias) {
   1.676 +            entry->stringArg.setTo(TRUE, alias.getBuffer(), -1);
   1.677 +        }
   1.678 +        else {
   1.679 +            entry->stringArg = alias;
   1.680 +        }
   1.681 +        registerEntry(ID, entry, visible);
   1.682 +    }
   1.683 +}
   1.684 +
   1.685 +void TransliteratorRegistry::remove(const UnicodeString& ID) {
   1.686 +    UnicodeString source, target, variant;
   1.687 +    UBool sawSource;
   1.688 +    TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
   1.689 +    // Only need to do this if ID.indexOf('-') < 0
   1.690 +    UnicodeString id;
   1.691 +    TransliteratorIDParser::STVtoID(source, target, variant, id);
   1.692 +    registry.remove(id);
   1.693 +    removeSTV(source, target, variant);
   1.694 +    availableIDs.removeElement((void*) &id);
   1.695 +}
   1.696 +
   1.697 +//----------------------------------------------------------------------
   1.698 +// class TransliteratorRegistry: Public ID and spec management
   1.699 +//----------------------------------------------------------------------
   1.700 +
   1.701 +/**
   1.702 + * == OBSOLETE - remove in ICU 3.4 ==
   1.703 + * Return the number of IDs currently registered with the system.
   1.704 + * To retrieve the actual IDs, call getAvailableID(i) with
   1.705 + * i from 0 to countAvailableIDs() - 1.
   1.706 + */
   1.707 +int32_t TransliteratorRegistry::countAvailableIDs(void) const {
   1.708 +    return availableIDs.size();
   1.709 +}
   1.710 +
   1.711 +/**
   1.712 + * == OBSOLETE - remove in ICU 3.4 ==
   1.713 + * Return the index-th available ID.  index must be between 0
   1.714 + * and countAvailableIDs() - 1, inclusive.  If index is out of
   1.715 + * range, the result of getAvailableID(0) is returned.
   1.716 + */
   1.717 +const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const {
   1.718 +    if (index < 0 || index >= availableIDs.size()) {
   1.719 +        index = 0;
   1.720 +    }
   1.721 +    return *(const UnicodeString*) availableIDs[index];
   1.722 +}
   1.723 +
   1.724 +StringEnumeration* TransliteratorRegistry::getAvailableIDs() const {
   1.725 +    return new Enumeration(*this);
   1.726 +}
   1.727 +
   1.728 +int32_t TransliteratorRegistry::countAvailableSources(void) const {
   1.729 +    return specDAG.count();
   1.730 +}
   1.731 +
   1.732 +UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index,
   1.733 +                                                          UnicodeString& result) const {
   1.734 +    int32_t pos = -1;
   1.735 +    const UHashElement *e = 0;
   1.736 +    while (index-- >= 0) {
   1.737 +        e = specDAG.nextElement(pos);
   1.738 +        if (e == 0) {
   1.739 +            break;
   1.740 +        }
   1.741 +    }
   1.742 +    if (e == 0) {
   1.743 +        result.truncate(0);
   1.744 +    } else {
   1.745 +        result = *(UnicodeString*) e->key.pointer;
   1.746 +    }
   1.747 +    return result;
   1.748 +}
   1.749 +
   1.750 +int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const {
   1.751 +    Hashtable *targets = (Hashtable*) specDAG.get(source);
   1.752 +    return (targets == 0) ? 0 : targets->count();
   1.753 +}
   1.754 +
   1.755 +UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index,
   1.756 +                                                          const UnicodeString& source,
   1.757 +                                                          UnicodeString& result) const {
   1.758 +    Hashtable *targets = (Hashtable*) specDAG.get(source);
   1.759 +    if (targets == 0) {
   1.760 +        result.truncate(0); // invalid source
   1.761 +        return result;
   1.762 +    }
   1.763 +    int32_t pos = -1;
   1.764 +    const UHashElement *e = 0;
   1.765 +    while (index-- >= 0) {
   1.766 +        e = targets->nextElement(pos);
   1.767 +        if (e == 0) {
   1.768 +            break;
   1.769 +        }
   1.770 +    }
   1.771 +    if (e == 0) {
   1.772 +        result.truncate(0); // invalid index
   1.773 +    } else {
   1.774 +        result = *(UnicodeString*) e->key.pointer;
   1.775 +    }
   1.776 +    return result;
   1.777 +}
   1.778 +
   1.779 +int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source,
   1.780 +                                                       const UnicodeString& target) const {
   1.781 +    Hashtable *targets = (Hashtable*) specDAG.get(source);
   1.782 +    if (targets == 0) {
   1.783 +        return 0;
   1.784 +    }
   1.785 +    UVector *variants = (UVector*) targets->get(target);
   1.786 +    // variants may be 0 if the source/target are invalid
   1.787 +    return (variants == 0) ? 0 : variants->size();
   1.788 +}
   1.789 +
   1.790 +UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
   1.791 +                                                           const UnicodeString& source,
   1.792 +                                                           const UnicodeString& target,
   1.793 +                                                           UnicodeString& result) const {
   1.794 +    Hashtable *targets = (Hashtable*) specDAG.get(source);
   1.795 +    if (targets == 0) {
   1.796 +        result.truncate(0); // invalid source
   1.797 +        return result;
   1.798 +    }
   1.799 +    UVector *variants = (UVector*) targets->get(target);
   1.800 +    if (variants == 0) {
   1.801 +        result.truncate(0); // invalid target
   1.802 +        return result;
   1.803 +    }
   1.804 +    UnicodeString *v = (UnicodeString*) variants->elementAt(index);
   1.805 +    if (v == 0) {
   1.806 +        result.truncate(0); // invalid index
   1.807 +    } else {
   1.808 +        result = *v;
   1.809 +    }
   1.810 +    return result;
   1.811 +}
   1.812 +
   1.813 +//----------------------------------------------------------------------
   1.814 +// class TransliteratorRegistry::Enumeration
   1.815 +//----------------------------------------------------------------------
   1.816 +
   1.817 +TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) :
   1.818 +    index(0), reg(_reg) {
   1.819 +}
   1.820 +
   1.821 +TransliteratorRegistry::Enumeration::~Enumeration() {
   1.822 +}
   1.823 +
   1.824 +int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const {
   1.825 +    return reg.availableIDs.size();
   1.826 +}
   1.827 +
   1.828 +const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) {
   1.829 +    // This is sloppy but safe -- if we get out of sync with the underlying
   1.830 +    // registry, we will still return legal strings, but they might not
   1.831 +    // correspond to the snapshot at construction time.  So there could be
   1.832 +    // duplicate IDs or omitted IDs if insertions or deletions occur in one
   1.833 +    // thread while another is iterating.  To be more rigorous, add a timestamp,
   1.834 +    // which is incremented with any modification, and validate this iterator
   1.835 +    // against the timestamp at construction time.  This probably isn't worth
   1.836 +    // doing as long as there is some possibility of removing this code in favor
   1.837 +    // of some new code based on Doug's service framework.
   1.838 +    if (U_FAILURE(status)) {
   1.839 +        return NULL;
   1.840 +    }
   1.841 +    int32_t n = reg.availableIDs.size();
   1.842 +    if (index > n) {
   1.843 +        status = U_ENUM_OUT_OF_SYNC_ERROR;
   1.844 +    }
   1.845 +    // index == n is okay -- this means we've reached the end
   1.846 +    if (index < n) {
   1.847 +        // Copy the string! This avoids lifetime problems.
   1.848 +        unistr = *(const UnicodeString*)reg.availableIDs[index++];
   1.849 +        return &unistr;
   1.850 +    } else {
   1.851 +        return NULL;
   1.852 +    }
   1.853 +}
   1.854 +
   1.855 +void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) {
   1.856 +    index = 0;
   1.857 +}
   1.858 +
   1.859 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)
   1.860 +
   1.861 +//----------------------------------------------------------------------
   1.862 +// class TransliteratorRegistry: internal
   1.863 +//----------------------------------------------------------------------
   1.864 +
   1.865 +/**
   1.866 + * Convenience method.  Calls 6-arg registerEntry().
   1.867 + */
   1.868 +void TransliteratorRegistry::registerEntry(const UnicodeString& source,
   1.869 +                                           const UnicodeString& target,
   1.870 +                                           const UnicodeString& variant,
   1.871 +                                           TransliteratorEntry* adopted,
   1.872 +                                           UBool visible) {
   1.873 +    UnicodeString ID;
   1.874 +    UnicodeString s(source);
   1.875 +    if (s.length() == 0) {
   1.876 +        s.setTo(TRUE, ANY, 3);
   1.877 +    }
   1.878 +    TransliteratorIDParser::STVtoID(source, target, variant, ID);
   1.879 +    registerEntry(ID, s, target, variant, adopted, visible);
   1.880 +}
   1.881 +
   1.882 +/**
   1.883 + * Convenience method.  Calls 6-arg registerEntry().
   1.884 + */
   1.885 +void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
   1.886 +                                           TransliteratorEntry* adopted,
   1.887 +                                           UBool visible) {
   1.888 +    UnicodeString source, target, variant;
   1.889 +    UBool sawSource;
   1.890 +    TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
   1.891 +    // Only need to do this if ID.indexOf('-') < 0
   1.892 +    UnicodeString id;
   1.893 +    TransliteratorIDParser::STVtoID(source, target, variant, id);
   1.894 +    registerEntry(id, source, target, variant, adopted, visible);
   1.895 +}
   1.896 +
   1.897 +/**
   1.898 + * Register an entry object (adopted) with the given ID, source,
   1.899 + * target, and variant strings.
   1.900 + */
   1.901 +void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
   1.902 +                                           const UnicodeString& source,
   1.903 +                                           const UnicodeString& target,
   1.904 +                                           const UnicodeString& variant,
   1.905 +                                           TransliteratorEntry* adopted,
   1.906 +                                           UBool visible) {
   1.907 +    UErrorCode status = U_ZERO_ERROR;
   1.908 +    registry.put(ID, adopted, status);
   1.909 +    if (visible) {
   1.910 +        registerSTV(source, target, variant);
   1.911 +        if (!availableIDs.contains((void*) &ID)) {
   1.912 +            UnicodeString *newID = (UnicodeString *)ID.clone();
   1.913 +            // Check to make sure newID was created.
   1.914 +            if (newID != NULL) {
   1.915 +	            // NUL-terminate the ID string
   1.916 +	            newID->getTerminatedBuffer();
   1.917 +	            availableIDs.addElement(newID, status);
   1.918 +            }
   1.919 +        }
   1.920 +    } else {
   1.921 +        removeSTV(source, target, variant);
   1.922 +        availableIDs.removeElement((void*) &ID);
   1.923 +    }
   1.924 +}
   1.925 +
   1.926 +/**
   1.927 + * Register a source-target/variant in the specDAG.  Variant may be
   1.928 + * empty, but source and target must not be.  If variant is empty then
   1.929 + * the special variant NO_VARIANT is stored in slot zero of the
   1.930 + * UVector of variants.
   1.931 + */
   1.932 +void TransliteratorRegistry::registerSTV(const UnicodeString& source,
   1.933 +                                         const UnicodeString& target,
   1.934 +                                         const UnicodeString& variant) {
   1.935 +    // assert(source.length() > 0);
   1.936 +    // assert(target.length() > 0);
   1.937 +    UErrorCode status = U_ZERO_ERROR;
   1.938 +    Hashtable *targets = (Hashtable*) specDAG.get(source);
   1.939 +    if (targets == 0) {
   1.940 +        targets = new Hashtable(TRUE, status);
   1.941 +        if (U_FAILURE(status) || targets == 0) {
   1.942 +            return;
   1.943 +        }
   1.944 +        targets->setValueDeleter(uprv_deleteUObject);
   1.945 +        specDAG.put(source, targets, status);
   1.946 +    }
   1.947 +    UVector *variants = (UVector*) targets->get(target);
   1.948 +    if (variants == 0) {
   1.949 +        variants = new UVector(uprv_deleteUObject,
   1.950 +                               uhash_compareCaselessUnicodeString, status);
   1.951 +        if (variants == 0) {
   1.952 +            return;
   1.953 +        }
   1.954 +        targets->put(target, variants, status);
   1.955 +    }
   1.956 +    // assert(NO_VARIANT == "");
   1.957 +    // We add the variant string.  If it is the special "no variant"
   1.958 +    // string, that is, the empty string, we add it at position zero.
   1.959 +    if (!variants->contains((void*) &variant)) {
   1.960 +    	UnicodeString *tempus; // Used for null pointer check.
   1.961 +        if (variant.length() > 0) {
   1.962 +        	tempus = new UnicodeString(variant);
   1.963 +        	if (tempus != NULL) {
   1.964 +        		variants->addElement(tempus, status);
   1.965 +        	}
   1.966 +        } else {
   1.967 +        	tempus = new UnicodeString();  // = NO_VARIANT
   1.968 +        	if (tempus != NULL) {
   1.969 +        		variants->insertElementAt(tempus, 0, status);
   1.970 +        	}
   1.971 +        }
   1.972 +    }
   1.973 +}
   1.974 +
   1.975 +/**
   1.976 + * Remove a source-target/variant from the specDAG.
   1.977 + */
   1.978 +void TransliteratorRegistry::removeSTV(const UnicodeString& source,
   1.979 +                                       const UnicodeString& target,
   1.980 +                                       const UnicodeString& variant) {
   1.981 +    // assert(source.length() > 0);
   1.982 +    // assert(target.length() > 0);
   1.983 +//    UErrorCode status = U_ZERO_ERROR;
   1.984 +    Hashtable *targets = (Hashtable*) specDAG.get(source);
   1.985 +    if (targets == 0) {
   1.986 +        return; // should never happen for valid s-t/v
   1.987 +    }
   1.988 +    UVector *variants = (UVector*) targets->get(target);
   1.989 +    if (variants == 0) {
   1.990 +        return; // should never happen for valid s-t/v
   1.991 +    }
   1.992 +    variants->removeElement((void*) &variant);
   1.993 +    if (variants->size() == 0) {
   1.994 +        targets->remove(target); // should delete variants
   1.995 +        if (targets->count() == 0) {
   1.996 +            specDAG.remove(source); // should delete targets
   1.997 +        }
   1.998 +    }
   1.999 +}
  1.1000 +
  1.1001 +/**
  1.1002 + * Attempt to find a source-target/variant in the dynamic registry
  1.1003 + * store.  Return 0 on failure.
  1.1004 + *
  1.1005 + * Caller does NOT own returned object.
  1.1006 + */
  1.1007 +TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src,
  1.1008 +                                                  const TransliteratorSpec& trg,
  1.1009 +                                                  const UnicodeString& variant) const {
  1.1010 +    UnicodeString ID;
  1.1011 +    TransliteratorIDParser::STVtoID(src, trg, variant, ID);
  1.1012 +    TransliteratorEntry *e = (TransliteratorEntry*) registry.get(ID);
  1.1013 +    DEBUG_useEntry(e);
  1.1014 +    return e;
  1.1015 +}
  1.1016 +
  1.1017 +/**
  1.1018 + * Attempt to find a source-target/variant in the static locale
  1.1019 + * resource store.  Do not perform fallback.  Return 0 on failure.
  1.1020 + *
  1.1021 + * On success, create a new entry object, register it in the dynamic
  1.1022 + * store, and return a pointer to it, but do not make it public --
  1.1023 + * just because someone requested something, we do not expand the
  1.1024 + * available ID list (or spec DAG).
  1.1025 + *
  1.1026 + * Caller does NOT own returned object.
  1.1027 + */
  1.1028 +TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src,
  1.1029 +                                                 const TransliteratorSpec& trg,
  1.1030 +                                                 const UnicodeString& variant) {
  1.1031 +    TransliteratorEntry* entry = 0;
  1.1032 +    if (src.isLocale()) {
  1.1033 +        entry = findInBundle(src, trg, variant, UTRANS_FORWARD);
  1.1034 +    } else if (trg.isLocale()) {
  1.1035 +        entry = findInBundle(trg, src, variant, UTRANS_REVERSE);
  1.1036 +    }
  1.1037 +
  1.1038 +    // If we found an entry, store it in the Hashtable for next
  1.1039 +    // time.
  1.1040 +    if (entry != 0) {
  1.1041 +        registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE);
  1.1042 +    }
  1.1043 +
  1.1044 +    return entry;
  1.1045 +}
  1.1046 +
  1.1047 +// As of 2.0, resource bundle keys cannot contain '_'
  1.1048 +static const UChar TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo"
  1.1049 +
  1.1050 +static const UChar TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom"
  1.1051 +
  1.1052 +static const UChar TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate"
  1.1053 +
  1.1054 +/**
  1.1055 + * Attempt to find an entry in a single resource bundle.  This is
  1.1056 + * a one-sided lookup.  findInStaticStore() performs up to two such
  1.1057 + * lookups, one for the source, and one for the target.
  1.1058 + *
  1.1059 + * Do not perform fallback.  Return 0 on failure.
  1.1060 + *
  1.1061 + * On success, create a new Entry object, populate it, and return it.
  1.1062 + * The caller owns the returned object.
  1.1063 + */
  1.1064 +TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
  1.1065 +                                            const TransliteratorSpec& specToFind,
  1.1066 +                                            const UnicodeString& variant,
  1.1067 +                                            UTransDirection direction)
  1.1068 +{
  1.1069 +    UnicodeString utag;
  1.1070 +    UnicodeString resStr;
  1.1071 +    int32_t pass;
  1.1072 +
  1.1073 +    for (pass=0; pass<2; ++pass) {
  1.1074 +        utag.truncate(0);
  1.1075 +        // First try either TransliteratorTo_xxx or
  1.1076 +        // TransliterateFrom_xxx, then try the bidirectional
  1.1077 +        // Transliterate_xxx.  This precedence order is arbitrary
  1.1078 +        // but must be consistent and documented.
  1.1079 +        if (pass == 0) {
  1.1080 +            utag.append(direction == UTRANS_FORWARD ?
  1.1081 +                        TRANSLITERATE_TO : TRANSLITERATE_FROM, -1);
  1.1082 +        } else {
  1.1083 +            utag.append(TRANSLITERATE, -1);
  1.1084 +        }
  1.1085 +        UnicodeString s(specToFind.get());
  1.1086 +        utag.append(s.toUpper(""));
  1.1087 +        UErrorCode status = U_ZERO_ERROR;
  1.1088 +        ResourceBundle subres(specToOpen.getBundle().get(
  1.1089 +            CharString().appendInvariantChars(utag, status).data(), status));
  1.1090 +        if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
  1.1091 +            continue;
  1.1092 +        }
  1.1093 +
  1.1094 +        s.truncate(0);
  1.1095 +        if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
  1.1096 +            continue;
  1.1097 +        }
  1.1098 +
  1.1099 +        if (variant.length() != 0) {
  1.1100 +            status = U_ZERO_ERROR;
  1.1101 +            resStr = subres.getStringEx(
  1.1102 +                CharString().appendInvariantChars(variant, status).data(), status);
  1.1103 +            if (U_SUCCESS(status)) {
  1.1104 +                // Exit loop successfully
  1.1105 +                break;
  1.1106 +            }
  1.1107 +        } else {
  1.1108 +            // Variant is empty, which means match the first variant listed.
  1.1109 +            status = U_ZERO_ERROR;
  1.1110 +            resStr = subres.getStringEx(1, status);
  1.1111 +            if (U_SUCCESS(status)) {
  1.1112 +                // Exit loop successfully
  1.1113 +                break;
  1.1114 +            }
  1.1115 +        }
  1.1116 +    }
  1.1117 +
  1.1118 +    if (pass==2) {
  1.1119 +        // Failed
  1.1120 +        return NULL;
  1.1121 +    }
  1.1122 +
  1.1123 +    // We have succeeded in loading a string from the locale
  1.1124 +    // resources.  Create a new registry entry to hold it and return it.
  1.1125 +    TransliteratorEntry *entry = new TransliteratorEntry();
  1.1126 +    if (entry != 0) {
  1.1127 +        // The direction is always forward for the
  1.1128 +        // TransliterateTo_xxx and TransliterateFrom_xxx
  1.1129 +        // items; those are unidirectional forward rules.
  1.1130 +        // For the bidirectional Transliterate_xxx items,
  1.1131 +        // the direction is the value passed in to this
  1.1132 +        // function.
  1.1133 +        int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction;
  1.1134 +        entry->entryType = TransliteratorEntry::LOCALE_RULES;
  1.1135 +        entry->stringArg = resStr;
  1.1136 +        entry->intArg = dir;
  1.1137 +    }
  1.1138 +
  1.1139 +    return entry;
  1.1140 +}
  1.1141 +
  1.1142 +/**
  1.1143 + * Convenience method.  Calls 3-arg find().
  1.1144 + */
  1.1145 +TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) {
  1.1146 +    UnicodeString source, target, variant;
  1.1147 +    UBool sawSource;
  1.1148 +    TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
  1.1149 +    return find(source, target, variant);
  1.1150 +}
  1.1151 +
  1.1152 +/**
  1.1153 + * Top-level find method.  Attempt to find a source-target/variant in
  1.1154 + * either the dynamic or the static (locale resource) store.  Perform
  1.1155 + * fallback.
  1.1156 + * 
  1.1157 + * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v:
  1.1158 + *
  1.1159 + *   ss_SS_SSS-tt_TT_TTT/v -- in hashtable
  1.1160 + *   ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback)
  1.1161 + * 
  1.1162 + *     repeat with t = tt_TT_TTT, tt_TT, tt, and tscript
  1.1163 + *
  1.1164 + *     ss_SS_SSS-t/ *
  1.1165 + *     ss_SS-t/ *
  1.1166 + *     ss-t/ *
  1.1167 + *     sscript-t/ *
  1.1168 + *
  1.1169 + * Here * matches the first variant listed.
  1.1170 + *
  1.1171 + * Caller does NOT own returned object.  Return 0 on failure.
  1.1172 + */
  1.1173 +TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source,
  1.1174 +                                    UnicodeString& target,
  1.1175 +                                    UnicodeString& variant) {
  1.1176 +    
  1.1177 +    TransliteratorSpec src(source);
  1.1178 +    TransliteratorSpec trg(target);
  1.1179 +    TransliteratorEntry* entry;
  1.1180 +
  1.1181 +    // Seek exact match in hashtable.  Temporary fix for ICU 4.6.
  1.1182 +    // TODO: The general logic for finding a matching transliterator needs to be reviewed.
  1.1183 +    // ICU ticket #8089
  1.1184 +    UnicodeString ID;
  1.1185 +    TransliteratorIDParser::STVtoID(source, target, variant, ID);
  1.1186 +    entry = (TransliteratorEntry*) registry.get(ID);
  1.1187 +    if (entry != 0) {
  1.1188 +        // std::string ss;
  1.1189 +        // std::cout << ID.toUTF8String(ss) << std::endl;
  1.1190 +        return entry;
  1.1191 +    }
  1.1192 +
  1.1193 +    if (variant.length() != 0) {
  1.1194 +        
  1.1195 +        // Seek exact match in hashtable
  1.1196 +        entry = findInDynamicStore(src, trg, variant);
  1.1197 +        if (entry != 0) {
  1.1198 +            return entry;
  1.1199 +        }
  1.1200 +        
  1.1201 +        // Seek exact match in locale resources
  1.1202 +        entry = findInStaticStore(src, trg, variant);
  1.1203 +        if (entry != 0) {
  1.1204 +            return entry;
  1.1205 +        }
  1.1206 +    }
  1.1207 +
  1.1208 +    for (;;) {
  1.1209 +        src.reset();
  1.1210 +        for (;;) {
  1.1211 +            // Seek match in hashtable
  1.1212 +            entry = findInDynamicStore(src, trg, NO_VARIANT);
  1.1213 +            if (entry != 0) {
  1.1214 +                return entry;
  1.1215 +            }
  1.1216 +            
  1.1217 +            // Seek match in locale resources
  1.1218 +            entry = findInStaticStore(src, trg, NO_VARIANT);
  1.1219 +            if (entry != 0) {
  1.1220 +                return entry;
  1.1221 +            }
  1.1222 +            if (!src.hasFallback()) {
  1.1223 +                break;
  1.1224 +            }
  1.1225 +            src.next();
  1.1226 +        }
  1.1227 +        if (!trg.hasFallback()) {
  1.1228 +            break;
  1.1229 +        }
  1.1230 +        trg.next();
  1.1231 +    }
  1.1232 +
  1.1233 +    return 0;
  1.1234 +}
  1.1235 +
  1.1236 +/**
  1.1237 + * Given an Entry object, instantiate it.  Caller owns result.  Return
  1.1238 + * 0 on failure.
  1.1239 + *
  1.1240 + * Return a non-empty aliasReturn value if the ID points to an alias.
  1.1241 + * We cannot instantiate it ourselves because the alias may contain
  1.1242 + * filters or compounds, which we do not understand.  Caller should
  1.1243 + * make aliasReturn empty before calling.
  1.1244 + *
  1.1245 + * The entry object is assumed to reside in the dynamic store.  It may be
  1.1246 + * modified.
  1.1247 + */
  1.1248 +Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID,
  1.1249 +                                                         TransliteratorEntry *entry,
  1.1250 +                                                         TransliteratorAlias* &aliasReturn,
  1.1251 +                                                         UErrorCode& status) {
  1.1252 +    Transliterator *t = 0;
  1.1253 +    U_ASSERT(aliasReturn == 0);
  1.1254 +
  1.1255 +    switch (entry->entryType) {
  1.1256 +    case TransliteratorEntry::RBT_DATA:
  1.1257 +        t = new RuleBasedTransliterator(ID, entry->u.data);
  1.1258 +        if (t == 0) {
  1.1259 +            status = U_MEMORY_ALLOCATION_ERROR;
  1.1260 +        }
  1.1261 +        return t;
  1.1262 +    case TransliteratorEntry::PROTOTYPE:
  1.1263 +        t = entry->u.prototype->clone();
  1.1264 +        if (t == 0) {
  1.1265 +            status = U_MEMORY_ALLOCATION_ERROR;
  1.1266 +        }
  1.1267 +        return t;
  1.1268 +    case TransliteratorEntry::ALIAS:
  1.1269 +        aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter);
  1.1270 +        if (aliasReturn == 0) {
  1.1271 +            status = U_MEMORY_ALLOCATION_ERROR;
  1.1272 +        }
  1.1273 +        return 0;
  1.1274 +    case TransliteratorEntry::FACTORY:
  1.1275 +        t = entry->u.factory.function(ID, entry->u.factory.context);
  1.1276 +        if (t == 0) {
  1.1277 +            status = U_MEMORY_ALLOCATION_ERROR;
  1.1278 +        }
  1.1279 +        return t;
  1.1280 +    case TransliteratorEntry::COMPOUND_RBT:
  1.1281 +        {
  1.1282 +            UVector* rbts = new UVector(entry->u.dataVector->size(), status);
  1.1283 +            // Check for null pointer
  1.1284 +            if (rbts == NULL) {
  1.1285 +            	status = U_MEMORY_ALLOCATION_ERROR;
  1.1286 +            	return NULL;
  1.1287 +            }
  1.1288 +            int32_t passNumber = 1;
  1.1289 +            for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
  1.1290 +                // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
  1.1291 +                Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++),
  1.1292 +                    (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE);
  1.1293 +                if (t == 0)
  1.1294 +                    status = U_MEMORY_ALLOCATION_ERROR;
  1.1295 +                else
  1.1296 +                    rbts->addElement(t, status);
  1.1297 +            }
  1.1298 +            if (U_FAILURE(status)) {
  1.1299 +                delete rbts;
  1.1300 +                return 0;
  1.1301 +            }
  1.1302 +            aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter);
  1.1303 +        }
  1.1304 +        if (aliasReturn == 0) {
  1.1305 +            status = U_MEMORY_ALLOCATION_ERROR;
  1.1306 +        }
  1.1307 +        return 0;
  1.1308 +    case TransliteratorEntry::LOCALE_RULES:
  1.1309 +        aliasReturn = new TransliteratorAlias(ID, entry->stringArg,
  1.1310 +                                              (UTransDirection) entry->intArg);
  1.1311 +        if (aliasReturn == 0) {
  1.1312 +            status = U_MEMORY_ALLOCATION_ERROR;
  1.1313 +        }
  1.1314 +        return 0;
  1.1315 +    case TransliteratorEntry::RULES_FORWARD:
  1.1316 +    case TransliteratorEntry::RULES_REVERSE:
  1.1317 +        // Process the rule data into a TransliteratorRuleData object,
  1.1318 +        // and possibly also into an ::id header and/or footer.  Then
  1.1319 +        // we modify the registry with the parsed data and retry.
  1.1320 +        {
  1.1321 +            TransliteratorParser parser(status);
  1.1322 +            
  1.1323 +            // We use the file name, taken from another resource bundle
  1.1324 +            // 2-d array at static init time, as a locale language.  We're
  1.1325 +            // just using the locale mechanism to map through to a file
  1.1326 +            // name; this in no way represents an actual locale.
  1.1327 +            //CharString ch(entry->stringArg);
  1.1328 +            //UResourceBundle *bundle = ures_openDirect(0, ch, &status);
  1.1329 +            UnicodeString rules = entry->stringArg;
  1.1330 +            //ures_close(bundle);
  1.1331 +            
  1.1332 +            //if (U_FAILURE(status)) {
  1.1333 +                // We have a failure of some kind.  Remove the ID from the
  1.1334 +                // registry so we don't keep trying.  NOTE: This will throw off
  1.1335 +                // anyone who is, at the moment, trying to iterate over the
  1.1336 +                // available IDs.  That's acceptable since we should never
  1.1337 +                // really get here except under installation, configuration,
  1.1338 +                // or unrecoverable run time memory failures.
  1.1339 +            //    remove(ID);
  1.1340 +            //} else {
  1.1341 +                
  1.1342 +                // If the status indicates a failure, then we don't have any
  1.1343 +                // rules -- there is probably an installation error.  The list
  1.1344 +                // in the root locale should correspond to all the installed
  1.1345 +                // transliterators; if it lists something that's not
  1.1346 +                // installed, we'll get an error from ResourceBundle.
  1.1347 +                aliasReturn = new TransliteratorAlias(ID, rules,
  1.1348 +                    ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ?
  1.1349 +                     UTRANS_REVERSE : UTRANS_FORWARD));
  1.1350 +                if (aliasReturn == 0) {
  1.1351 +                    status = U_MEMORY_ALLOCATION_ERROR;
  1.1352 +                }
  1.1353 +            //}
  1.1354 +        }
  1.1355 +        return 0;
  1.1356 +    default:
  1.1357 +        U_ASSERT(FALSE); // can't get here
  1.1358 +        return 0;
  1.1359 +    }
  1.1360 +}
  1.1361 +U_NAMESPACE_END
  1.1362 +
  1.1363 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
  1.1364 +
  1.1365 +//eof

mercurial