michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (c) 2001-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 08/10/2001 aliu Creation. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_TRANSLITERATION michael@0: michael@0: #include "unicode/translit.h" michael@0: #include "unicode/resbund.h" michael@0: #include "unicode/uniset.h" michael@0: #include "unicode/uscript.h" michael@0: #include "rbt.h" michael@0: #include "cpdtrans.h" michael@0: #include "nultrans.h" michael@0: #include "transreg.h" michael@0: #include "rbt_data.h" michael@0: #include "rbt_pars.h" michael@0: #include "tridpars.h" michael@0: #include "charstr.h" michael@0: #include "uassert.h" michael@0: #include "locutil.h" michael@0: michael@0: // Enable the following symbol to add debugging code that tracks the michael@0: // allocation, deletion, and use of Entry objects. BoundsChecker has michael@0: // reported dangling pointer errors with these objects, but I have michael@0: // been unable to confirm them. I suspect BoundsChecker is getting michael@0: // confused with pointers going into and coming out of a UHashtable, michael@0: // despite the hinting code that is designed to help it. michael@0: // #define DEBUG_MEM michael@0: #ifdef DEBUG_MEM michael@0: #include michael@0: #endif michael@0: michael@0: // UChar constants michael@0: static const UChar LOCALE_SEP = 95; // '_' michael@0: //static const UChar ID_SEP = 0x002D; /*-*/ michael@0: //static const UChar VARIANT_SEP = 0x002F; // '/' michael@0: michael@0: // String constants michael@0: static const UChar ANY[] = { 65, 110, 121, 0 }; // Any michael@0: michael@0: // empty string michael@0: #define NO_VARIANT UnicodeString() michael@0: michael@0: /** michael@0: * Resource bundle key for the RuleBasedTransliterator rule. michael@0: */ michael@0: //static const char RB_RULE[] = "Rule"; michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: //------------------------------------------------------------------ michael@0: // Alias michael@0: //------------------------------------------------------------------ michael@0: michael@0: TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID, michael@0: const UnicodeSet* cpdFilter) : michael@0: ID(), michael@0: aliasesOrRules(theAliasID), michael@0: transes(0), michael@0: compoundFilter(cpdFilter), michael@0: direction(UTRANS_FORWARD), michael@0: type(TransliteratorAlias::SIMPLE) { michael@0: } michael@0: michael@0: TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, michael@0: const UnicodeString& idBlocks, michael@0: UVector* adoptedTransliterators, michael@0: const UnicodeSet* cpdFilter) : michael@0: ID(theID), michael@0: aliasesOrRules(idBlocks), michael@0: transes(adoptedTransliterators), michael@0: compoundFilter(cpdFilter), michael@0: direction(UTRANS_FORWARD), michael@0: type(TransliteratorAlias::COMPOUND) { michael@0: } michael@0: michael@0: TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, michael@0: const UnicodeString& rules, michael@0: UTransDirection dir) : michael@0: ID(theID), michael@0: aliasesOrRules(rules), michael@0: transes(0), michael@0: compoundFilter(0), michael@0: direction(dir), michael@0: type(TransliteratorAlias::RULES) { michael@0: } michael@0: michael@0: TransliteratorAlias::~TransliteratorAlias() { michael@0: delete transes; michael@0: } michael@0: michael@0: michael@0: Transliterator* TransliteratorAlias::create(UParseError& pe, michael@0: UErrorCode& ec) { michael@0: if (U_FAILURE(ec)) { michael@0: return 0; michael@0: } michael@0: Transliterator *t = NULL; michael@0: switch (type) { michael@0: case SIMPLE: michael@0: t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec); michael@0: if(U_FAILURE(ec)){ michael@0: return 0; michael@0: } michael@0: if (compoundFilter != 0) michael@0: t->adoptFilter((UnicodeSet*)compoundFilter->clone()); michael@0: break; michael@0: case COMPOUND: michael@0: { michael@0: // the total number of transliterators in the compound is the total number of anonymous transliterators michael@0: // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID michael@0: // block and that each pair anonymous transliterators has an ID block between them. Then we go back michael@0: // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which michael@0: // marks the position where an anonymous transliterator goes) and adjust accordingly michael@0: int32_t anonymousRBTs = transes->size(); michael@0: int32_t transCount = anonymousRBTs * 2 + 1; michael@0: if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff)) michael@0: --transCount; michael@0: if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff)) michael@0: --transCount; michael@0: UnicodeString noIDBlock((UChar)(0xffff)); michael@0: noIDBlock += ((UChar)(0xffff)); michael@0: int32_t pos = aliasesOrRules.indexOf(noIDBlock); michael@0: while (pos >= 0) { michael@0: --transCount; michael@0: pos = aliasesOrRules.indexOf(noIDBlock, pos + 1); michael@0: } michael@0: michael@0: UVector transliterators(ec); michael@0: UnicodeString idBlock; michael@0: int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); michael@0: while (blockSeparatorPos >= 0) { michael@0: aliasesOrRules.extract(0, blockSeparatorPos, idBlock); michael@0: aliasesOrRules.remove(0, blockSeparatorPos + 1); michael@0: if (!idBlock.isEmpty()) michael@0: transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec); michael@0: if (!transes->isEmpty()) michael@0: transliterators.addElement(transes->orphanElementAt(0), ec); michael@0: blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); michael@0: } michael@0: if (!aliasesOrRules.isEmpty()) michael@0: transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec); michael@0: while (!transes->isEmpty()) michael@0: transliterators.addElement(transes->orphanElementAt(0), ec); michael@0: michael@0: if (U_SUCCESS(ec)) { michael@0: t = new CompoundTransliterator(ID, transliterators, michael@0: (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0), michael@0: anonymousRBTs, pe, ec); michael@0: if (t == 0) { michael@0: ec = U_MEMORY_ALLOCATION_ERROR; michael@0: return 0; michael@0: } michael@0: } else { michael@0: for (int32_t i = 0; i < transliterators.size(); i++) michael@0: delete (Transliterator*)(transliterators.elementAt(i)); michael@0: } michael@0: } michael@0: break; michael@0: case RULES: michael@0: U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE! michael@0: break; michael@0: } michael@0: return t; michael@0: } michael@0: michael@0: UBool TransliteratorAlias::isRuleBased() const { michael@0: return type == RULES; michael@0: } michael@0: michael@0: void TransliteratorAlias::parse(TransliteratorParser& parser, michael@0: UParseError& pe, UErrorCode& ec) const { michael@0: U_ASSERT(type == RULES); michael@0: if (U_FAILURE(ec)) { michael@0: return; michael@0: } michael@0: michael@0: parser.parse(aliasesOrRules, direction, pe, ec); michael@0: } michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // class TransliteratorSpec michael@0: //---------------------------------------------------------------------- michael@0: michael@0: /** michael@0: * A TransliteratorSpec is a string specifying either a source or a target. In more michael@0: * general terms, it may also specify a variant, but we only use the michael@0: * Spec class for sources and targets. michael@0: * michael@0: * A Spec may be a locale or a script. If it is a locale, it has a michael@0: * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where michael@0: * ssss is the script mapping of xx_YY_ZZZ. The Spec API methods michael@0: * hasFallback(), next(), and reset() iterate over this fallback michael@0: * sequence. michael@0: * michael@0: * The Spec class canonicalizes itself, so the locale is put into michael@0: * canonical form, or the script is transformed from an abbreviation michael@0: * to a full name. michael@0: */ michael@0: class TransliteratorSpec : public UMemory { michael@0: public: michael@0: TransliteratorSpec(const UnicodeString& spec); michael@0: ~TransliteratorSpec(); michael@0: michael@0: const UnicodeString& get() const; michael@0: UBool hasFallback() const; michael@0: const UnicodeString& next(); michael@0: void reset(); michael@0: michael@0: UBool isLocale() const; michael@0: ResourceBundle& getBundle() const; michael@0: michael@0: operator const UnicodeString&() const { return get(); } michael@0: const UnicodeString& getTop() const { return top; } michael@0: michael@0: private: michael@0: void setupNext(); michael@0: michael@0: UnicodeString top; michael@0: UnicodeString spec; michael@0: UnicodeString nextSpec; michael@0: UnicodeString scriptName; michael@0: UBool isSpecLocale; // TRUE if spec is a locale michael@0: UBool isNextLocale; // TRUE if nextSpec is a locale michael@0: ResourceBundle* res; michael@0: michael@0: TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class michael@0: TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class michael@0: }; michael@0: michael@0: TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec) michael@0: : top(theSpec), michael@0: res(0) michael@0: { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: Locale topLoc(""); michael@0: LocaleUtility::initLocaleFromName(theSpec, topLoc); michael@0: if (!topLoc.isBogus()) { michael@0: res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status); michael@0: /* test for NULL */ michael@0: if (res == 0) { michael@0: return; michael@0: } michael@0: if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { michael@0: delete res; michael@0: res = 0; michael@0: } michael@0: } michael@0: michael@0: // Canonicalize script name -or- do locale->script mapping michael@0: status = U_ZERO_ERROR; michael@0: static const int32_t capacity = 10; michael@0: UScriptCode script[capacity]={USCRIPT_INVALID_CODE}; michael@0: int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(), michael@0: script, capacity, &status); michael@0: if (num > 0 && script[0] != USCRIPT_INVALID_CODE) { michael@0: scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV); michael@0: } michael@0: michael@0: // Canonicalize top michael@0: if (res != 0) { michael@0: // Canonicalize locale name michael@0: UnicodeString locStr; michael@0: LocaleUtility::initNameFromLocale(topLoc, locStr); michael@0: if (!locStr.isBogus()) { michael@0: top = locStr; michael@0: } michael@0: } else if (scriptName.length() != 0) { michael@0: // We are a script; use canonical name michael@0: top = scriptName; michael@0: } michael@0: michael@0: // assert(spec != top); michael@0: reset(); michael@0: } michael@0: michael@0: TransliteratorSpec::~TransliteratorSpec() { michael@0: delete res; michael@0: } michael@0: michael@0: UBool TransliteratorSpec::hasFallback() const { michael@0: return nextSpec.length() != 0; michael@0: } michael@0: michael@0: void TransliteratorSpec::reset() { michael@0: if (spec != top) { michael@0: spec = top; michael@0: isSpecLocale = (res != 0); michael@0: setupNext(); michael@0: } michael@0: } michael@0: michael@0: void TransliteratorSpec::setupNext() { michael@0: isNextLocale = FALSE; michael@0: if (isSpecLocale) { michael@0: nextSpec = spec; michael@0: int32_t i = nextSpec.lastIndexOf(LOCALE_SEP); michael@0: // If i == 0 then we have _FOO, so we fall through michael@0: // to the scriptName. michael@0: if (i > 0) { michael@0: nextSpec.truncate(i); michael@0: isNextLocale = TRUE; michael@0: } else { michael@0: nextSpec = scriptName; // scriptName may be empty michael@0: } michael@0: } else { michael@0: // spec is a script, so we are at the end michael@0: nextSpec.truncate(0); michael@0: } michael@0: } michael@0: michael@0: // Protocol: michael@0: // for(const UnicodeString& s(spec.get()); michael@0: // spec.hasFallback(); s(spec.next())) { ... michael@0: michael@0: const UnicodeString& TransliteratorSpec::next() { michael@0: spec = nextSpec; michael@0: isSpecLocale = isNextLocale; michael@0: setupNext(); michael@0: return spec; michael@0: } michael@0: michael@0: const UnicodeString& TransliteratorSpec::get() const { michael@0: return spec; michael@0: } michael@0: michael@0: UBool TransliteratorSpec::isLocale() const { michael@0: return isSpecLocale; michael@0: } michael@0: michael@0: ResourceBundle& TransliteratorSpec::getBundle() const { michael@0: return *res; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: #ifdef DEBUG_MEM michael@0: michael@0: // Vector of Entry pointers currently in use michael@0: static UVector* DEBUG_entries = NULL; michael@0: michael@0: static void DEBUG_setup() { michael@0: if (DEBUG_entries == NULL) { michael@0: UErrorCode ec = U_ZERO_ERROR; michael@0: DEBUG_entries = new UVector(ec); michael@0: } michael@0: } michael@0: michael@0: // Caller must call DEBUG_setup first. Return index of given Entry, michael@0: // if it is in use (not deleted yet), or -1 if not found. michael@0: static int DEBUG_findEntry(TransliteratorEntry* e) { michael@0: for (int i=0; isize(); ++i) { michael@0: if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) { michael@0: return i; michael@0: } michael@0: } michael@0: return -1; michael@0: } michael@0: michael@0: // Track object creation michael@0: static void DEBUG_newEntry(TransliteratorEntry* e) { michael@0: DEBUG_setup(); michael@0: if (DEBUG_findEntry(e) >= 0) { michael@0: // This should really never happen unless the heap is broken michael@0: printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e); michael@0: return; michael@0: } michael@0: UErrorCode ec = U_ZERO_ERROR; michael@0: DEBUG_entries->addElement(e, ec); michael@0: } michael@0: michael@0: // Track object deletion michael@0: static void DEBUG_delEntry(TransliteratorEntry* e) { michael@0: DEBUG_setup(); michael@0: int i = DEBUG_findEntry(e); michael@0: if (i < 0) { michael@0: printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e); michael@0: return; michael@0: } michael@0: DEBUG_entries->removeElementAt(i); michael@0: } michael@0: michael@0: // Track object usage michael@0: static void DEBUG_useEntry(TransliteratorEntry* e) { michael@0: if (e == NULL) return; michael@0: DEBUG_setup(); michael@0: int i = DEBUG_findEntry(e); michael@0: if (i < 0) { michael@0: printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e); michael@0: } michael@0: } michael@0: michael@0: #else michael@0: // If we're not debugging then make these macros into NOPs michael@0: #define DEBUG_newEntry(x) michael@0: #define DEBUG_delEntry(x) michael@0: #define DEBUG_useEntry(x) michael@0: #endif michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // class Entry michael@0: //---------------------------------------------------------------------- michael@0: michael@0: /** michael@0: * The Entry object stores objects of different types and michael@0: * singleton objects as placeholders for rule-based transliterators to michael@0: * be built as needed. Instances of this struct can be placeholders, michael@0: * can represent prototype transliterators to be cloned, or can michael@0: * represent TransliteratorData objects. We don't support storing michael@0: * classes in the registry because we don't have the rtti infrastructure michael@0: * for it. We could easily add this if there is a need for it in the michael@0: * future. michael@0: */ michael@0: class TransliteratorEntry : public UMemory { michael@0: public: michael@0: enum Type { michael@0: RULES_FORWARD, michael@0: RULES_REVERSE, michael@0: LOCALE_RULES, michael@0: PROTOTYPE, michael@0: RBT_DATA, michael@0: COMPOUND_RBT, michael@0: ALIAS, michael@0: FACTORY, michael@0: NONE // Only used for uninitialized entries michael@0: } entryType; michael@0: // NOTE: stringArg cannot go inside the union because michael@0: // it has a copy constructor michael@0: UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT michael@0: int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES michael@0: UnicodeSet* compoundFilter; // For COMPOUND_RBT michael@0: union { michael@0: Transliterator* prototype; // For PROTOTYPE michael@0: TransliterationRuleData* data; // For RBT_DATA michael@0: UVector* dataVector; // For COMPOUND_RBT michael@0: struct { michael@0: Transliterator::Factory function; michael@0: Transliterator::Token context; michael@0: } factory; // For FACTORY michael@0: } u; michael@0: TransliteratorEntry(); michael@0: ~TransliteratorEntry(); michael@0: void adoptPrototype(Transliterator* adopted); michael@0: void setFactory(Transliterator::Factory factory, michael@0: Transliterator::Token context); michael@0: michael@0: private: michael@0: michael@0: TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class michael@0: TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class michael@0: }; michael@0: michael@0: TransliteratorEntry::TransliteratorEntry() { michael@0: u.prototype = 0; michael@0: compoundFilter = NULL; michael@0: entryType = NONE; michael@0: DEBUG_newEntry(this); michael@0: } michael@0: michael@0: TransliteratorEntry::~TransliteratorEntry() { michael@0: DEBUG_delEntry(this); michael@0: if (entryType == PROTOTYPE) { michael@0: delete u.prototype; michael@0: } else if (entryType == RBT_DATA) { michael@0: // The data object is shared between instances of RBT. The michael@0: // entry object owns it. It should only be deleted when the michael@0: // transliterator component is being cleaned up. Doing so michael@0: // invalidates any RBTs that the user has instantiated. michael@0: delete u.data; michael@0: } else if (entryType == COMPOUND_RBT) { michael@0: while (u.dataVector != NULL && !u.dataVector->isEmpty()) michael@0: delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0); michael@0: delete u.dataVector; michael@0: } michael@0: delete compoundFilter; michael@0: } michael@0: michael@0: void TransliteratorEntry::adoptPrototype(Transliterator* adopted) { michael@0: if (entryType == PROTOTYPE) { michael@0: delete u.prototype; michael@0: } michael@0: entryType = PROTOTYPE; michael@0: u.prototype = adopted; michael@0: } michael@0: michael@0: void TransliteratorEntry::setFactory(Transliterator::Factory factory, michael@0: Transliterator::Token context) { michael@0: if (entryType == PROTOTYPE) { michael@0: delete u.prototype; michael@0: } michael@0: entryType = FACTORY; michael@0: u.factory.function = factory; michael@0: u.factory.context = context; michael@0: } michael@0: michael@0: // UObjectDeleter for Hashtable::setValueDeleter michael@0: U_CDECL_BEGIN michael@0: static void U_CALLCONV michael@0: deleteEntry(void* obj) { michael@0: delete (TransliteratorEntry*) obj; michael@0: } michael@0: U_CDECL_END michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // class TransliteratorRegistry: Basic public API michael@0: //---------------------------------------------------------------------- michael@0: michael@0: TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) : michael@0: registry(TRUE, status), michael@0: specDAG(TRUE, status), michael@0: availableIDs(status) michael@0: { michael@0: registry.setValueDeleter(deleteEntry); michael@0: availableIDs.setDeleter(uprv_deleteUObject); michael@0: availableIDs.setComparer(uhash_compareCaselessUnicodeString); michael@0: specDAG.setValueDeleter(uhash_deleteHashtable); michael@0: } michael@0: michael@0: TransliteratorRegistry::~TransliteratorRegistry() { michael@0: // Through the magic of C++, everything cleans itself up michael@0: } michael@0: michael@0: Transliterator* TransliteratorRegistry::get(const UnicodeString& ID, michael@0: TransliteratorAlias*& aliasReturn, michael@0: UErrorCode& status) { michael@0: U_ASSERT(aliasReturn == NULL); michael@0: TransliteratorEntry *entry = find(ID); michael@0: return (entry == 0) ? 0 michael@0: : instantiateEntry(ID, entry, aliasReturn, status); michael@0: } michael@0: michael@0: Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID, michael@0: TransliteratorParser& parser, michael@0: TransliteratorAlias*& aliasReturn, michael@0: UErrorCode& status) { michael@0: U_ASSERT(aliasReturn == NULL); michael@0: TransliteratorEntry *entry = find(ID); michael@0: michael@0: if (entry == 0) { michael@0: // We get to this point if there are two threads, one of which michael@0: // is instantiating an ID, and another of which is removing michael@0: // the same ID from the registry, and the timing is just right. michael@0: return 0; michael@0: } michael@0: michael@0: // The usage model for the caller is that they will first call michael@0: // reg->get() inside the mutex, they'll get back an alias, they call michael@0: // alias->isRuleBased(), and if they get TRUE, they call alias->parse() michael@0: // outside the mutex, then reg->reget() inside the mutex again. A real michael@0: // mess, but it gets things working for ICU 3.0. [alan]. michael@0: michael@0: // Note: It's possible that in between the caller calling michael@0: // alias->parse() and reg->reget(), that another thread will have michael@0: // called reg->reget(), and the entry will already have been fixed up. michael@0: // We have to detect this so we don't stomp over existing entry michael@0: // data members and potentially leak memory (u.data and compoundFilter). michael@0: michael@0: if (entry->entryType == TransliteratorEntry::RULES_FORWARD || michael@0: entry->entryType == TransliteratorEntry::RULES_REVERSE || michael@0: entry->entryType == TransliteratorEntry::LOCALE_RULES) { michael@0: michael@0: if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) { michael@0: entry->u.data = 0; michael@0: entry->entryType = TransliteratorEntry::ALIAS; michael@0: entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL"); michael@0: } michael@0: else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) { michael@0: entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); michael@0: entry->entryType = TransliteratorEntry::RBT_DATA; michael@0: } michael@0: else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) { michael@0: entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0)); michael@0: entry->compoundFilter = parser.orphanCompoundFilter(); michael@0: entry->entryType = TransliteratorEntry::ALIAS; michael@0: } michael@0: else { michael@0: entry->entryType = TransliteratorEntry::COMPOUND_RBT; michael@0: entry->compoundFilter = parser.orphanCompoundFilter(); michael@0: entry->u.dataVector = new UVector(status); michael@0: entry->stringArg.remove(); michael@0: michael@0: int32_t limit = parser.idBlockVector.size(); michael@0: if (parser.dataVector.size() > limit) michael@0: limit = parser.dataVector.size(); michael@0: michael@0: for (int32_t i = 0; i < limit; i++) { michael@0: if (i < parser.idBlockVector.size()) { michael@0: UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i); michael@0: if (!idBlock->isEmpty()) michael@0: entry->stringArg += *idBlock; michael@0: } michael@0: if (!parser.dataVector.isEmpty()) { michael@0: TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); michael@0: entry->u.dataVector->addElement(data, status); michael@0: entry->stringArg += (UChar)0xffff; // use U+FFFF to mark position of RBTs in ID block michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: Transliterator *t = michael@0: instantiateEntry(ID, entry, aliasReturn, status); michael@0: return t; michael@0: } michael@0: michael@0: void TransliteratorRegistry::put(Transliterator* adoptedProto, michael@0: UBool visible, michael@0: UErrorCode& ec) michael@0: { michael@0: TransliteratorEntry *entry = new TransliteratorEntry(); michael@0: if (entry == NULL) { michael@0: ec = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: entry->adoptPrototype(adoptedProto); michael@0: registerEntry(adoptedProto->getID(), entry, visible); michael@0: } michael@0: michael@0: void TransliteratorRegistry::put(const UnicodeString& ID, michael@0: Transliterator::Factory factory, michael@0: Transliterator::Token context, michael@0: UBool visible, michael@0: UErrorCode& ec) { michael@0: TransliteratorEntry *entry = new TransliteratorEntry(); michael@0: if (entry == NULL) { michael@0: ec = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: entry->setFactory(factory, context); michael@0: registerEntry(ID, entry, visible); michael@0: } michael@0: michael@0: void TransliteratorRegistry::put(const UnicodeString& ID, michael@0: const UnicodeString& resourceName, michael@0: UTransDirection dir, michael@0: UBool readonlyResourceAlias, michael@0: UBool visible, michael@0: UErrorCode& ec) { michael@0: TransliteratorEntry *entry = new TransliteratorEntry(); michael@0: if (entry == NULL) { michael@0: ec = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD michael@0: : TransliteratorEntry::RULES_REVERSE; michael@0: if (readonlyResourceAlias) { michael@0: entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1); michael@0: } michael@0: else { michael@0: entry->stringArg = resourceName; michael@0: } michael@0: registerEntry(ID, entry, visible); michael@0: } michael@0: michael@0: void TransliteratorRegistry::put(const UnicodeString& ID, michael@0: const UnicodeString& alias, michael@0: UBool readonlyAliasAlias, michael@0: UBool visible, michael@0: UErrorCode& /*ec*/) { michael@0: TransliteratorEntry *entry = new TransliteratorEntry(); michael@0: // Null pointer check michael@0: if (entry != NULL) { michael@0: entry->entryType = TransliteratorEntry::ALIAS; michael@0: if (readonlyAliasAlias) { michael@0: entry->stringArg.setTo(TRUE, alias.getBuffer(), -1); michael@0: } michael@0: else { michael@0: entry->stringArg = alias; michael@0: } michael@0: registerEntry(ID, entry, visible); michael@0: } michael@0: } michael@0: michael@0: void TransliteratorRegistry::remove(const UnicodeString& ID) { michael@0: UnicodeString source, target, variant; michael@0: UBool sawSource; michael@0: TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); michael@0: // Only need to do this if ID.indexOf('-') < 0 michael@0: UnicodeString id; michael@0: TransliteratorIDParser::STVtoID(source, target, variant, id); michael@0: registry.remove(id); michael@0: removeSTV(source, target, variant); michael@0: availableIDs.removeElement((void*) &id); michael@0: } michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // class TransliteratorRegistry: Public ID and spec management michael@0: //---------------------------------------------------------------------- michael@0: michael@0: /** michael@0: * == OBSOLETE - remove in ICU 3.4 == michael@0: * Return the number of IDs currently registered with the system. michael@0: * To retrieve the actual IDs, call getAvailableID(i) with michael@0: * i from 0 to countAvailableIDs() - 1. michael@0: */ michael@0: int32_t TransliteratorRegistry::countAvailableIDs(void) const { michael@0: return availableIDs.size(); michael@0: } michael@0: michael@0: /** michael@0: * == OBSOLETE - remove in ICU 3.4 == michael@0: * Return the index-th available ID. index must be between 0 michael@0: * and countAvailableIDs() - 1, inclusive. If index is out of michael@0: * range, the result of getAvailableID(0) is returned. michael@0: */ michael@0: const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const { michael@0: if (index < 0 || index >= availableIDs.size()) { michael@0: index = 0; michael@0: } michael@0: return *(const UnicodeString*) availableIDs[index]; michael@0: } michael@0: michael@0: StringEnumeration* TransliteratorRegistry::getAvailableIDs() const { michael@0: return new Enumeration(*this); michael@0: } michael@0: michael@0: int32_t TransliteratorRegistry::countAvailableSources(void) const { michael@0: return specDAG.count(); michael@0: } michael@0: michael@0: UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index, michael@0: UnicodeString& result) const { michael@0: int32_t pos = -1; michael@0: const UHashElement *e = 0; michael@0: while (index-- >= 0) { michael@0: e = specDAG.nextElement(pos); michael@0: if (e == 0) { michael@0: break; michael@0: } michael@0: } michael@0: if (e == 0) { michael@0: result.truncate(0); michael@0: } else { michael@0: result = *(UnicodeString*) e->key.pointer; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const { michael@0: Hashtable *targets = (Hashtable*) specDAG.get(source); michael@0: return (targets == 0) ? 0 : targets->count(); michael@0: } michael@0: michael@0: UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index, michael@0: const UnicodeString& source, michael@0: UnicodeString& result) const { michael@0: Hashtable *targets = (Hashtable*) specDAG.get(source); michael@0: if (targets == 0) { michael@0: result.truncate(0); // invalid source michael@0: return result; michael@0: } michael@0: int32_t pos = -1; michael@0: const UHashElement *e = 0; michael@0: while (index-- >= 0) { michael@0: e = targets->nextElement(pos); michael@0: if (e == 0) { michael@0: break; michael@0: } michael@0: } michael@0: if (e == 0) { michael@0: result.truncate(0); // invalid index michael@0: } else { michael@0: result = *(UnicodeString*) e->key.pointer; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source, michael@0: const UnicodeString& target) const { michael@0: Hashtable *targets = (Hashtable*) specDAG.get(source); michael@0: if (targets == 0) { michael@0: return 0; michael@0: } michael@0: UVector *variants = (UVector*) targets->get(target); michael@0: // variants may be 0 if the source/target are invalid michael@0: return (variants == 0) ? 0 : variants->size(); michael@0: } michael@0: michael@0: UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index, michael@0: const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: UnicodeString& result) const { michael@0: Hashtable *targets = (Hashtable*) specDAG.get(source); michael@0: if (targets == 0) { michael@0: result.truncate(0); // invalid source michael@0: return result; michael@0: } michael@0: UVector *variants = (UVector*) targets->get(target); michael@0: if (variants == 0) { michael@0: result.truncate(0); // invalid target michael@0: return result; michael@0: } michael@0: UnicodeString *v = (UnicodeString*) variants->elementAt(index); michael@0: if (v == 0) { michael@0: result.truncate(0); // invalid index michael@0: } else { michael@0: result = *v; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // class TransliteratorRegistry::Enumeration michael@0: //---------------------------------------------------------------------- michael@0: michael@0: TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) : michael@0: index(0), reg(_reg) { michael@0: } michael@0: michael@0: TransliteratorRegistry::Enumeration::~Enumeration() { michael@0: } michael@0: michael@0: int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const { michael@0: return reg.availableIDs.size(); michael@0: } michael@0: michael@0: const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) { michael@0: // This is sloppy but safe -- if we get out of sync with the underlying michael@0: // registry, we will still return legal strings, but they might not michael@0: // correspond to the snapshot at construction time. So there could be michael@0: // duplicate IDs or omitted IDs if insertions or deletions occur in one michael@0: // thread while another is iterating. To be more rigorous, add a timestamp, michael@0: // which is incremented with any modification, and validate this iterator michael@0: // against the timestamp at construction time. This probably isn't worth michael@0: // doing as long as there is some possibility of removing this code in favor michael@0: // of some new code based on Doug's service framework. michael@0: if (U_FAILURE(status)) { michael@0: return NULL; michael@0: } michael@0: int32_t n = reg.availableIDs.size(); michael@0: if (index > n) { michael@0: status = U_ENUM_OUT_OF_SYNC_ERROR; michael@0: } michael@0: // index == n is okay -- this means we've reached the end michael@0: if (index < n) { michael@0: // Copy the string! This avoids lifetime problems. michael@0: unistr = *(const UnicodeString*)reg.availableIDs[index++]; michael@0: return &unistr; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) { michael@0: index = 0; michael@0: } michael@0: michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration) michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // class TransliteratorRegistry: internal michael@0: //---------------------------------------------------------------------- michael@0: michael@0: /** michael@0: * Convenience method. Calls 6-arg registerEntry(). michael@0: */ michael@0: void TransliteratorRegistry::registerEntry(const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: const UnicodeString& variant, michael@0: TransliteratorEntry* adopted, michael@0: UBool visible) { michael@0: UnicodeString ID; michael@0: UnicodeString s(source); michael@0: if (s.length() == 0) { michael@0: s.setTo(TRUE, ANY, 3); michael@0: } michael@0: TransliteratorIDParser::STVtoID(source, target, variant, ID); michael@0: registerEntry(ID, s, target, variant, adopted, visible); michael@0: } michael@0: michael@0: /** michael@0: * Convenience method. Calls 6-arg registerEntry(). michael@0: */ michael@0: void TransliteratorRegistry::registerEntry(const UnicodeString& ID, michael@0: TransliteratorEntry* adopted, michael@0: UBool visible) { michael@0: UnicodeString source, target, variant; michael@0: UBool sawSource; michael@0: TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); michael@0: // Only need to do this if ID.indexOf('-') < 0 michael@0: UnicodeString id; michael@0: TransliteratorIDParser::STVtoID(source, target, variant, id); michael@0: registerEntry(id, source, target, variant, adopted, visible); michael@0: } michael@0: michael@0: /** michael@0: * Register an entry object (adopted) with the given ID, source, michael@0: * target, and variant strings. michael@0: */ michael@0: void TransliteratorRegistry::registerEntry(const UnicodeString& ID, michael@0: const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: const UnicodeString& variant, michael@0: TransliteratorEntry* adopted, michael@0: UBool visible) { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: registry.put(ID, adopted, status); michael@0: if (visible) { michael@0: registerSTV(source, target, variant); michael@0: if (!availableIDs.contains((void*) &ID)) { michael@0: UnicodeString *newID = (UnicodeString *)ID.clone(); michael@0: // Check to make sure newID was created. michael@0: if (newID != NULL) { michael@0: // NUL-terminate the ID string michael@0: newID->getTerminatedBuffer(); michael@0: availableIDs.addElement(newID, status); michael@0: } michael@0: } michael@0: } else { michael@0: removeSTV(source, target, variant); michael@0: availableIDs.removeElement((void*) &ID); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Register a source-target/variant in the specDAG. Variant may be michael@0: * empty, but source and target must not be. If variant is empty then michael@0: * the special variant NO_VARIANT is stored in slot zero of the michael@0: * UVector of variants. michael@0: */ michael@0: void TransliteratorRegistry::registerSTV(const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: const UnicodeString& variant) { michael@0: // assert(source.length() > 0); michael@0: // assert(target.length() > 0); michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: Hashtable *targets = (Hashtable*) specDAG.get(source); michael@0: if (targets == 0) { michael@0: targets = new Hashtable(TRUE, status); michael@0: if (U_FAILURE(status) || targets == 0) { michael@0: return; michael@0: } michael@0: targets->setValueDeleter(uprv_deleteUObject); michael@0: specDAG.put(source, targets, status); michael@0: } michael@0: UVector *variants = (UVector*) targets->get(target); michael@0: if (variants == 0) { michael@0: variants = new UVector(uprv_deleteUObject, michael@0: uhash_compareCaselessUnicodeString, status); michael@0: if (variants == 0) { michael@0: return; michael@0: } michael@0: targets->put(target, variants, status); michael@0: } michael@0: // assert(NO_VARIANT == ""); michael@0: // We add the variant string. If it is the special "no variant" michael@0: // string, that is, the empty string, we add it at position zero. michael@0: if (!variants->contains((void*) &variant)) { michael@0: UnicodeString *tempus; // Used for null pointer check. michael@0: if (variant.length() > 0) { michael@0: tempus = new UnicodeString(variant); michael@0: if (tempus != NULL) { michael@0: variants->addElement(tempus, status); michael@0: } michael@0: } else { michael@0: tempus = new UnicodeString(); // = NO_VARIANT michael@0: if (tempus != NULL) { michael@0: variants->insertElementAt(tempus, 0, status); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Remove a source-target/variant from the specDAG. michael@0: */ michael@0: void TransliteratorRegistry::removeSTV(const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: const UnicodeString& variant) { michael@0: // assert(source.length() > 0); michael@0: // assert(target.length() > 0); michael@0: // UErrorCode status = U_ZERO_ERROR; michael@0: Hashtable *targets = (Hashtable*) specDAG.get(source); michael@0: if (targets == 0) { michael@0: return; // should never happen for valid s-t/v michael@0: } michael@0: UVector *variants = (UVector*) targets->get(target); michael@0: if (variants == 0) { michael@0: return; // should never happen for valid s-t/v michael@0: } michael@0: variants->removeElement((void*) &variant); michael@0: if (variants->size() == 0) { michael@0: targets->remove(target); // should delete variants michael@0: if (targets->count() == 0) { michael@0: specDAG.remove(source); // should delete targets michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Attempt to find a source-target/variant in the dynamic registry michael@0: * store. Return 0 on failure. michael@0: * michael@0: * Caller does NOT own returned object. michael@0: */ michael@0: TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src, michael@0: const TransliteratorSpec& trg, michael@0: const UnicodeString& variant) const { michael@0: UnicodeString ID; michael@0: TransliteratorIDParser::STVtoID(src, trg, variant, ID); michael@0: TransliteratorEntry *e = (TransliteratorEntry*) registry.get(ID); michael@0: DEBUG_useEntry(e); michael@0: return e; michael@0: } michael@0: michael@0: /** michael@0: * Attempt to find a source-target/variant in the static locale michael@0: * resource store. Do not perform fallback. Return 0 on failure. michael@0: * michael@0: * On success, create a new entry object, register it in the dynamic michael@0: * store, and return a pointer to it, but do not make it public -- michael@0: * just because someone requested something, we do not expand the michael@0: * available ID list (or spec DAG). michael@0: * michael@0: * Caller does NOT own returned object. michael@0: */ michael@0: TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src, michael@0: const TransliteratorSpec& trg, michael@0: const UnicodeString& variant) { michael@0: TransliteratorEntry* entry = 0; michael@0: if (src.isLocale()) { michael@0: entry = findInBundle(src, trg, variant, UTRANS_FORWARD); michael@0: } else if (trg.isLocale()) { michael@0: entry = findInBundle(trg, src, variant, UTRANS_REVERSE); michael@0: } michael@0: michael@0: // If we found an entry, store it in the Hashtable for next michael@0: // time. michael@0: if (entry != 0) { michael@0: registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE); michael@0: } michael@0: michael@0: return entry; michael@0: } michael@0: michael@0: // As of 2.0, resource bundle keys cannot contain '_' michael@0: static const UChar TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo" michael@0: michael@0: static const UChar TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom" michael@0: michael@0: static const UChar TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate" michael@0: michael@0: /** michael@0: * Attempt to find an entry in a single resource bundle. This is michael@0: * a one-sided lookup. findInStaticStore() performs up to two such michael@0: * lookups, one for the source, and one for the target. michael@0: * michael@0: * Do not perform fallback. Return 0 on failure. michael@0: * michael@0: * On success, create a new Entry object, populate it, and return it. michael@0: * The caller owns the returned object. michael@0: */ michael@0: TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen, michael@0: const TransliteratorSpec& specToFind, michael@0: const UnicodeString& variant, michael@0: UTransDirection direction) michael@0: { michael@0: UnicodeString utag; michael@0: UnicodeString resStr; michael@0: int32_t pass; michael@0: michael@0: for (pass=0; pass<2; ++pass) { michael@0: utag.truncate(0); michael@0: // First try either TransliteratorTo_xxx or michael@0: // TransliterateFrom_xxx, then try the bidirectional michael@0: // Transliterate_xxx. This precedence order is arbitrary michael@0: // but must be consistent and documented. michael@0: if (pass == 0) { michael@0: utag.append(direction == UTRANS_FORWARD ? michael@0: TRANSLITERATE_TO : TRANSLITERATE_FROM, -1); michael@0: } else { michael@0: utag.append(TRANSLITERATE, -1); michael@0: } michael@0: UnicodeString s(specToFind.get()); michael@0: utag.append(s.toUpper("")); michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: ResourceBundle subres(specToOpen.getBundle().get( michael@0: CharString().appendInvariantChars(utag, status).data(), status)); michael@0: if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { michael@0: continue; michael@0: } michael@0: michael@0: s.truncate(0); michael@0: if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) { michael@0: continue; michael@0: } michael@0: michael@0: if (variant.length() != 0) { michael@0: status = U_ZERO_ERROR; michael@0: resStr = subres.getStringEx( michael@0: CharString().appendInvariantChars(variant, status).data(), status); michael@0: if (U_SUCCESS(status)) { michael@0: // Exit loop successfully michael@0: break; michael@0: } michael@0: } else { michael@0: // Variant is empty, which means match the first variant listed. michael@0: status = U_ZERO_ERROR; michael@0: resStr = subres.getStringEx(1, status); michael@0: if (U_SUCCESS(status)) { michael@0: // Exit loop successfully michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (pass==2) { michael@0: // Failed michael@0: return NULL; michael@0: } michael@0: michael@0: // We have succeeded in loading a string from the locale michael@0: // resources. Create a new registry entry to hold it and return it. michael@0: TransliteratorEntry *entry = new TransliteratorEntry(); michael@0: if (entry != 0) { michael@0: // The direction is always forward for the michael@0: // TransliterateTo_xxx and TransliterateFrom_xxx michael@0: // items; those are unidirectional forward rules. michael@0: // For the bidirectional Transliterate_xxx items, michael@0: // the direction is the value passed in to this michael@0: // function. michael@0: int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction; michael@0: entry->entryType = TransliteratorEntry::LOCALE_RULES; michael@0: entry->stringArg = resStr; michael@0: entry->intArg = dir; michael@0: } michael@0: michael@0: return entry; michael@0: } michael@0: michael@0: /** michael@0: * Convenience method. Calls 3-arg find(). michael@0: */ michael@0: TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) { michael@0: UnicodeString source, target, variant; michael@0: UBool sawSource; michael@0: TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); michael@0: return find(source, target, variant); michael@0: } michael@0: michael@0: /** michael@0: * Top-level find method. Attempt to find a source-target/variant in michael@0: * either the dynamic or the static (locale resource) store. Perform michael@0: * fallback. michael@0: * michael@0: * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v: michael@0: * michael@0: * ss_SS_SSS-tt_TT_TTT/v -- in hashtable michael@0: * ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback) michael@0: * michael@0: * repeat with t = tt_TT_TTT, tt_TT, tt, and tscript michael@0: * michael@0: * ss_SS_SSS-t/ * michael@0: * ss_SS-t/ * michael@0: * ss-t/ * michael@0: * sscript-t/ * michael@0: * michael@0: * Here * matches the first variant listed. michael@0: * michael@0: * Caller does NOT own returned object. Return 0 on failure. michael@0: */ michael@0: TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source, michael@0: UnicodeString& target, michael@0: UnicodeString& variant) { michael@0: michael@0: TransliteratorSpec src(source); michael@0: TransliteratorSpec trg(target); michael@0: TransliteratorEntry* entry; michael@0: michael@0: // Seek exact match in hashtable. Temporary fix for ICU 4.6. michael@0: // TODO: The general logic for finding a matching transliterator needs to be reviewed. michael@0: // ICU ticket #8089 michael@0: UnicodeString ID; michael@0: TransliteratorIDParser::STVtoID(source, target, variant, ID); michael@0: entry = (TransliteratorEntry*) registry.get(ID); michael@0: if (entry != 0) { michael@0: // std::string ss; michael@0: // std::cout << ID.toUTF8String(ss) << std::endl; michael@0: return entry; michael@0: } michael@0: michael@0: if (variant.length() != 0) { michael@0: michael@0: // Seek exact match in hashtable michael@0: entry = findInDynamicStore(src, trg, variant); michael@0: if (entry != 0) { michael@0: return entry; michael@0: } michael@0: michael@0: // Seek exact match in locale resources michael@0: entry = findInStaticStore(src, trg, variant); michael@0: if (entry != 0) { michael@0: return entry; michael@0: } michael@0: } michael@0: michael@0: for (;;) { michael@0: src.reset(); michael@0: for (;;) { michael@0: // Seek match in hashtable michael@0: entry = findInDynamicStore(src, trg, NO_VARIANT); michael@0: if (entry != 0) { michael@0: return entry; michael@0: } michael@0: michael@0: // Seek match in locale resources michael@0: entry = findInStaticStore(src, trg, NO_VARIANT); michael@0: if (entry != 0) { michael@0: return entry; michael@0: } michael@0: if (!src.hasFallback()) { michael@0: break; michael@0: } michael@0: src.next(); michael@0: } michael@0: if (!trg.hasFallback()) { michael@0: break; michael@0: } michael@0: trg.next(); michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: /** michael@0: * Given an Entry object, instantiate it. Caller owns result. Return michael@0: * 0 on failure. michael@0: * michael@0: * Return a non-empty aliasReturn value if the ID points to an alias. michael@0: * We cannot instantiate it ourselves because the alias may contain michael@0: * filters or compounds, which we do not understand. Caller should michael@0: * make aliasReturn empty before calling. michael@0: * michael@0: * The entry object is assumed to reside in the dynamic store. It may be michael@0: * modified. michael@0: */ michael@0: Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID, michael@0: TransliteratorEntry *entry, michael@0: TransliteratorAlias* &aliasReturn, michael@0: UErrorCode& status) { michael@0: Transliterator *t = 0; michael@0: U_ASSERT(aliasReturn == 0); michael@0: michael@0: switch (entry->entryType) { michael@0: case TransliteratorEntry::RBT_DATA: michael@0: t = new RuleBasedTransliterator(ID, entry->u.data); michael@0: if (t == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: return t; michael@0: case TransliteratorEntry::PROTOTYPE: michael@0: t = entry->u.prototype->clone(); michael@0: if (t == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: return t; michael@0: case TransliteratorEntry::ALIAS: michael@0: aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter); michael@0: if (aliasReturn == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: return 0; michael@0: case TransliteratorEntry::FACTORY: michael@0: t = entry->u.factory.function(ID, entry->u.factory.context); michael@0: if (t == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: return t; michael@0: case TransliteratorEntry::COMPOUND_RBT: michael@0: { michael@0: UVector* rbts = new UVector(entry->u.dataVector->size(), status); michael@0: // Check for null pointer michael@0: if (rbts == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: int32_t passNumber = 1; michael@0: for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) { michael@0: // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? michael@0: Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), michael@0: (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE); michael@0: if (t == 0) michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: else michael@0: rbts->addElement(t, status); michael@0: } michael@0: if (U_FAILURE(status)) { michael@0: delete rbts; michael@0: return 0; michael@0: } michael@0: aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter); michael@0: } michael@0: if (aliasReturn == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: return 0; michael@0: case TransliteratorEntry::LOCALE_RULES: michael@0: aliasReturn = new TransliteratorAlias(ID, entry->stringArg, michael@0: (UTransDirection) entry->intArg); michael@0: if (aliasReturn == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: return 0; michael@0: case TransliteratorEntry::RULES_FORWARD: michael@0: case TransliteratorEntry::RULES_REVERSE: michael@0: // Process the rule data into a TransliteratorRuleData object, michael@0: // and possibly also into an ::id header and/or footer. Then michael@0: // we modify the registry with the parsed data and retry. michael@0: { michael@0: TransliteratorParser parser(status); michael@0: michael@0: // We use the file name, taken from another resource bundle michael@0: // 2-d array at static init time, as a locale language. We're michael@0: // just using the locale mechanism to map through to a file michael@0: // name; this in no way represents an actual locale. michael@0: //CharString ch(entry->stringArg); michael@0: //UResourceBundle *bundle = ures_openDirect(0, ch, &status); michael@0: UnicodeString rules = entry->stringArg; michael@0: //ures_close(bundle); michael@0: michael@0: //if (U_FAILURE(status)) { michael@0: // We have a failure of some kind. Remove the ID from the michael@0: // registry so we don't keep trying. NOTE: This will throw off michael@0: // anyone who is, at the moment, trying to iterate over the michael@0: // available IDs. That's acceptable since we should never michael@0: // really get here except under installation, configuration, michael@0: // or unrecoverable run time memory failures. michael@0: // remove(ID); michael@0: //} else { michael@0: michael@0: // If the status indicates a failure, then we don't have any michael@0: // rules -- there is probably an installation error. The list michael@0: // in the root locale should correspond to all the installed michael@0: // transliterators; if it lists something that's not michael@0: // installed, we'll get an error from ResourceBundle. michael@0: aliasReturn = new TransliteratorAlias(ID, rules, michael@0: ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ? michael@0: UTRANS_REVERSE : UTRANS_FORWARD)); michael@0: if (aliasReturn == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: //} michael@0: } michael@0: return 0; michael@0: default: michael@0: U_ASSERT(FALSE); // can't get here michael@0: return 0; michael@0: } michael@0: } michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_TRANSLITERATION */ michael@0: michael@0: //eof