Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* |
michael@0 | 2 | ********************************************************************** |
michael@0 | 3 | * Copyright (c) 2001-2011, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ********************************************************************** |
michael@0 | 6 | * Date Name Description |
michael@0 | 7 | * 08/10/2001 aliu Creation. |
michael@0 | 8 | ********************************************************************** |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include "unicode/utypes.h" |
michael@0 | 12 | |
michael@0 | 13 | #if !UCONFIG_NO_TRANSLITERATION |
michael@0 | 14 | |
michael@0 | 15 | #include "unicode/translit.h" |
michael@0 | 16 | #include "unicode/resbund.h" |
michael@0 | 17 | #include "unicode/uniset.h" |
michael@0 | 18 | #include "unicode/uscript.h" |
michael@0 | 19 | #include "rbt.h" |
michael@0 | 20 | #include "cpdtrans.h" |
michael@0 | 21 | #include "nultrans.h" |
michael@0 | 22 | #include "transreg.h" |
michael@0 | 23 | #include "rbt_data.h" |
michael@0 | 24 | #include "rbt_pars.h" |
michael@0 | 25 | #include "tridpars.h" |
michael@0 | 26 | #include "charstr.h" |
michael@0 | 27 | #include "uassert.h" |
michael@0 | 28 | #include "locutil.h" |
michael@0 | 29 | |
michael@0 | 30 | // Enable the following symbol to add debugging code that tracks the |
michael@0 | 31 | // allocation, deletion, and use of Entry objects. BoundsChecker has |
michael@0 | 32 | // reported dangling pointer errors with these objects, but I have |
michael@0 | 33 | // been unable to confirm them. I suspect BoundsChecker is getting |
michael@0 | 34 | // confused with pointers going into and coming out of a UHashtable, |
michael@0 | 35 | // despite the hinting code that is designed to help it. |
michael@0 | 36 | // #define DEBUG_MEM |
michael@0 | 37 | #ifdef DEBUG_MEM |
michael@0 | 38 | #include <stdio.h> |
michael@0 | 39 | #endif |
michael@0 | 40 | |
michael@0 | 41 | // UChar constants |
michael@0 | 42 | static const UChar LOCALE_SEP = 95; // '_' |
michael@0 | 43 | //static const UChar ID_SEP = 0x002D; /*-*/ |
michael@0 | 44 | //static const UChar VARIANT_SEP = 0x002F; // '/' |
michael@0 | 45 | |
michael@0 | 46 | // String constants |
michael@0 | 47 | static const UChar ANY[] = { 65, 110, 121, 0 }; // Any |
michael@0 | 48 | |
michael@0 | 49 | // empty string |
michael@0 | 50 | #define NO_VARIANT UnicodeString() |
michael@0 | 51 | |
michael@0 | 52 | /** |
michael@0 | 53 | * Resource bundle key for the RuleBasedTransliterator rule. |
michael@0 | 54 | */ |
michael@0 | 55 | //static const char RB_RULE[] = "Rule"; |
michael@0 | 56 | |
michael@0 | 57 | U_NAMESPACE_BEGIN |
michael@0 | 58 | |
michael@0 | 59 | //------------------------------------------------------------------ |
michael@0 | 60 | // Alias |
michael@0 | 61 | //------------------------------------------------------------------ |
michael@0 | 62 | |
michael@0 | 63 | TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID, |
michael@0 | 64 | const UnicodeSet* cpdFilter) : |
michael@0 | 65 | ID(), |
michael@0 | 66 | aliasesOrRules(theAliasID), |
michael@0 | 67 | transes(0), |
michael@0 | 68 | compoundFilter(cpdFilter), |
michael@0 | 69 | direction(UTRANS_FORWARD), |
michael@0 | 70 | type(TransliteratorAlias::SIMPLE) { |
michael@0 | 71 | } |
michael@0 | 72 | |
michael@0 | 73 | TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, |
michael@0 | 74 | const UnicodeString& idBlocks, |
michael@0 | 75 | UVector* adoptedTransliterators, |
michael@0 | 76 | const UnicodeSet* cpdFilter) : |
michael@0 | 77 | ID(theID), |
michael@0 | 78 | aliasesOrRules(idBlocks), |
michael@0 | 79 | transes(adoptedTransliterators), |
michael@0 | 80 | compoundFilter(cpdFilter), |
michael@0 | 81 | direction(UTRANS_FORWARD), |
michael@0 | 82 | type(TransliteratorAlias::COMPOUND) { |
michael@0 | 83 | } |
michael@0 | 84 | |
michael@0 | 85 | TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, |
michael@0 | 86 | const UnicodeString& rules, |
michael@0 | 87 | UTransDirection dir) : |
michael@0 | 88 | ID(theID), |
michael@0 | 89 | aliasesOrRules(rules), |
michael@0 | 90 | transes(0), |
michael@0 | 91 | compoundFilter(0), |
michael@0 | 92 | direction(dir), |
michael@0 | 93 | type(TransliteratorAlias::RULES) { |
michael@0 | 94 | } |
michael@0 | 95 | |
michael@0 | 96 | TransliteratorAlias::~TransliteratorAlias() { |
michael@0 | 97 | delete transes; |
michael@0 | 98 | } |
michael@0 | 99 | |
michael@0 | 100 | |
michael@0 | 101 | Transliterator* TransliteratorAlias::create(UParseError& pe, |
michael@0 | 102 | UErrorCode& ec) { |
michael@0 | 103 | if (U_FAILURE(ec)) { |
michael@0 | 104 | return 0; |
michael@0 | 105 | } |
michael@0 | 106 | Transliterator *t = NULL; |
michael@0 | 107 | switch (type) { |
michael@0 | 108 | case SIMPLE: |
michael@0 | 109 | t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec); |
michael@0 | 110 | if(U_FAILURE(ec)){ |
michael@0 | 111 | return 0; |
michael@0 | 112 | } |
michael@0 | 113 | if (compoundFilter != 0) |
michael@0 | 114 | t->adoptFilter((UnicodeSet*)compoundFilter->clone()); |
michael@0 | 115 | break; |
michael@0 | 116 | case COMPOUND: |
michael@0 | 117 | { |
michael@0 | 118 | // the total number of transliterators in the compound is the total number of anonymous transliterators |
michael@0 | 119 | // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID |
michael@0 | 120 | // block and that each pair anonymous transliterators has an ID block between them. Then we go back |
michael@0 | 121 | // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which |
michael@0 | 122 | // marks the position where an anonymous transliterator goes) and adjust accordingly |
michael@0 | 123 | int32_t anonymousRBTs = transes->size(); |
michael@0 | 124 | int32_t transCount = anonymousRBTs * 2 + 1; |
michael@0 | 125 | if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff)) |
michael@0 | 126 | --transCount; |
michael@0 | 127 | if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff)) |
michael@0 | 128 | --transCount; |
michael@0 | 129 | UnicodeString noIDBlock((UChar)(0xffff)); |
michael@0 | 130 | noIDBlock += ((UChar)(0xffff)); |
michael@0 | 131 | int32_t pos = aliasesOrRules.indexOf(noIDBlock); |
michael@0 | 132 | while (pos >= 0) { |
michael@0 | 133 | --transCount; |
michael@0 | 134 | pos = aliasesOrRules.indexOf(noIDBlock, pos + 1); |
michael@0 | 135 | } |
michael@0 | 136 | |
michael@0 | 137 | UVector transliterators(ec); |
michael@0 | 138 | UnicodeString idBlock; |
michael@0 | 139 | int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); |
michael@0 | 140 | while (blockSeparatorPos >= 0) { |
michael@0 | 141 | aliasesOrRules.extract(0, blockSeparatorPos, idBlock); |
michael@0 | 142 | aliasesOrRules.remove(0, blockSeparatorPos + 1); |
michael@0 | 143 | if (!idBlock.isEmpty()) |
michael@0 | 144 | transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec); |
michael@0 | 145 | if (!transes->isEmpty()) |
michael@0 | 146 | transliterators.addElement(transes->orphanElementAt(0), ec); |
michael@0 | 147 | blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); |
michael@0 | 148 | } |
michael@0 | 149 | if (!aliasesOrRules.isEmpty()) |
michael@0 | 150 | transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec); |
michael@0 | 151 | while (!transes->isEmpty()) |
michael@0 | 152 | transliterators.addElement(transes->orphanElementAt(0), ec); |
michael@0 | 153 | |
michael@0 | 154 | if (U_SUCCESS(ec)) { |
michael@0 | 155 | t = new CompoundTransliterator(ID, transliterators, |
michael@0 | 156 | (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0), |
michael@0 | 157 | anonymousRBTs, pe, ec); |
michael@0 | 158 | if (t == 0) { |
michael@0 | 159 | ec = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 160 | return 0; |
michael@0 | 161 | } |
michael@0 | 162 | } else { |
michael@0 | 163 | for (int32_t i = 0; i < transliterators.size(); i++) |
michael@0 | 164 | delete (Transliterator*)(transliterators.elementAt(i)); |
michael@0 | 165 | } |
michael@0 | 166 | } |
michael@0 | 167 | break; |
michael@0 | 168 | case RULES: |
michael@0 | 169 | U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE! |
michael@0 | 170 | break; |
michael@0 | 171 | } |
michael@0 | 172 | return t; |
michael@0 | 173 | } |
michael@0 | 174 | |
michael@0 | 175 | UBool TransliteratorAlias::isRuleBased() const { |
michael@0 | 176 | return type == RULES; |
michael@0 | 177 | } |
michael@0 | 178 | |
michael@0 | 179 | void TransliteratorAlias::parse(TransliteratorParser& parser, |
michael@0 | 180 | UParseError& pe, UErrorCode& ec) const { |
michael@0 | 181 | U_ASSERT(type == RULES); |
michael@0 | 182 | if (U_FAILURE(ec)) { |
michael@0 | 183 | return; |
michael@0 | 184 | } |
michael@0 | 185 | |
michael@0 | 186 | parser.parse(aliasesOrRules, direction, pe, ec); |
michael@0 | 187 | } |
michael@0 | 188 | |
michael@0 | 189 | //---------------------------------------------------------------------- |
michael@0 | 190 | // class TransliteratorSpec |
michael@0 | 191 | //---------------------------------------------------------------------- |
michael@0 | 192 | |
michael@0 | 193 | /** |
michael@0 | 194 | * A TransliteratorSpec is a string specifying either a source or a target. In more |
michael@0 | 195 | * general terms, it may also specify a variant, but we only use the |
michael@0 | 196 | * Spec class for sources and targets. |
michael@0 | 197 | * |
michael@0 | 198 | * A Spec may be a locale or a script. If it is a locale, it has a |
michael@0 | 199 | * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where |
michael@0 | 200 | * ssss is the script mapping of xx_YY_ZZZ. The Spec API methods |
michael@0 | 201 | * hasFallback(), next(), and reset() iterate over this fallback |
michael@0 | 202 | * sequence. |
michael@0 | 203 | * |
michael@0 | 204 | * The Spec class canonicalizes itself, so the locale is put into |
michael@0 | 205 | * canonical form, or the script is transformed from an abbreviation |
michael@0 | 206 | * to a full name. |
michael@0 | 207 | */ |
michael@0 | 208 | class TransliteratorSpec : public UMemory { |
michael@0 | 209 | public: |
michael@0 | 210 | TransliteratorSpec(const UnicodeString& spec); |
michael@0 | 211 | ~TransliteratorSpec(); |
michael@0 | 212 | |
michael@0 | 213 | const UnicodeString& get() const; |
michael@0 | 214 | UBool hasFallback() const; |
michael@0 | 215 | const UnicodeString& next(); |
michael@0 | 216 | void reset(); |
michael@0 | 217 | |
michael@0 | 218 | UBool isLocale() const; |
michael@0 | 219 | ResourceBundle& getBundle() const; |
michael@0 | 220 | |
michael@0 | 221 | operator const UnicodeString&() const { return get(); } |
michael@0 | 222 | const UnicodeString& getTop() const { return top; } |
michael@0 | 223 | |
michael@0 | 224 | private: |
michael@0 | 225 | void setupNext(); |
michael@0 | 226 | |
michael@0 | 227 | UnicodeString top; |
michael@0 | 228 | UnicodeString spec; |
michael@0 | 229 | UnicodeString nextSpec; |
michael@0 | 230 | UnicodeString scriptName; |
michael@0 | 231 | UBool isSpecLocale; // TRUE if spec is a locale |
michael@0 | 232 | UBool isNextLocale; // TRUE if nextSpec is a locale |
michael@0 | 233 | ResourceBundle* res; |
michael@0 | 234 | |
michael@0 | 235 | TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class |
michael@0 | 236 | TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class |
michael@0 | 237 | }; |
michael@0 | 238 | |
michael@0 | 239 | TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec) |
michael@0 | 240 | : top(theSpec), |
michael@0 | 241 | res(0) |
michael@0 | 242 | { |
michael@0 | 243 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 244 | Locale topLoc(""); |
michael@0 | 245 | LocaleUtility::initLocaleFromName(theSpec, topLoc); |
michael@0 | 246 | if (!topLoc.isBogus()) { |
michael@0 | 247 | res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status); |
michael@0 | 248 | /* test for NULL */ |
michael@0 | 249 | if (res == 0) { |
michael@0 | 250 | return; |
michael@0 | 251 | } |
michael@0 | 252 | if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { |
michael@0 | 253 | delete res; |
michael@0 | 254 | res = 0; |
michael@0 | 255 | } |
michael@0 | 256 | } |
michael@0 | 257 | |
michael@0 | 258 | // Canonicalize script name -or- do locale->script mapping |
michael@0 | 259 | status = U_ZERO_ERROR; |
michael@0 | 260 | static const int32_t capacity = 10; |
michael@0 | 261 | UScriptCode script[capacity]={USCRIPT_INVALID_CODE}; |
michael@0 | 262 | int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(), |
michael@0 | 263 | script, capacity, &status); |
michael@0 | 264 | if (num > 0 && script[0] != USCRIPT_INVALID_CODE) { |
michael@0 | 265 | scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV); |
michael@0 | 266 | } |
michael@0 | 267 | |
michael@0 | 268 | // Canonicalize top |
michael@0 | 269 | if (res != 0) { |
michael@0 | 270 | // Canonicalize locale name |
michael@0 | 271 | UnicodeString locStr; |
michael@0 | 272 | LocaleUtility::initNameFromLocale(topLoc, locStr); |
michael@0 | 273 | if (!locStr.isBogus()) { |
michael@0 | 274 | top = locStr; |
michael@0 | 275 | } |
michael@0 | 276 | } else if (scriptName.length() != 0) { |
michael@0 | 277 | // We are a script; use canonical name |
michael@0 | 278 | top = scriptName; |
michael@0 | 279 | } |
michael@0 | 280 | |
michael@0 | 281 | // assert(spec != top); |
michael@0 | 282 | reset(); |
michael@0 | 283 | } |
michael@0 | 284 | |
michael@0 | 285 | TransliteratorSpec::~TransliteratorSpec() { |
michael@0 | 286 | delete res; |
michael@0 | 287 | } |
michael@0 | 288 | |
michael@0 | 289 | UBool TransliteratorSpec::hasFallback() const { |
michael@0 | 290 | return nextSpec.length() != 0; |
michael@0 | 291 | } |
michael@0 | 292 | |
michael@0 | 293 | void TransliteratorSpec::reset() { |
michael@0 | 294 | if (spec != top) { |
michael@0 | 295 | spec = top; |
michael@0 | 296 | isSpecLocale = (res != 0); |
michael@0 | 297 | setupNext(); |
michael@0 | 298 | } |
michael@0 | 299 | } |
michael@0 | 300 | |
michael@0 | 301 | void TransliteratorSpec::setupNext() { |
michael@0 | 302 | isNextLocale = FALSE; |
michael@0 | 303 | if (isSpecLocale) { |
michael@0 | 304 | nextSpec = spec; |
michael@0 | 305 | int32_t i = nextSpec.lastIndexOf(LOCALE_SEP); |
michael@0 | 306 | // If i == 0 then we have _FOO, so we fall through |
michael@0 | 307 | // to the scriptName. |
michael@0 | 308 | if (i > 0) { |
michael@0 | 309 | nextSpec.truncate(i); |
michael@0 | 310 | isNextLocale = TRUE; |
michael@0 | 311 | } else { |
michael@0 | 312 | nextSpec = scriptName; // scriptName may be empty |
michael@0 | 313 | } |
michael@0 | 314 | } else { |
michael@0 | 315 | // spec is a script, so we are at the end |
michael@0 | 316 | nextSpec.truncate(0); |
michael@0 | 317 | } |
michael@0 | 318 | } |
michael@0 | 319 | |
michael@0 | 320 | // Protocol: |
michael@0 | 321 | // for(const UnicodeString& s(spec.get()); |
michael@0 | 322 | // spec.hasFallback(); s(spec.next())) { ... |
michael@0 | 323 | |
michael@0 | 324 | const UnicodeString& TransliteratorSpec::next() { |
michael@0 | 325 | spec = nextSpec; |
michael@0 | 326 | isSpecLocale = isNextLocale; |
michael@0 | 327 | setupNext(); |
michael@0 | 328 | return spec; |
michael@0 | 329 | } |
michael@0 | 330 | |
michael@0 | 331 | const UnicodeString& TransliteratorSpec::get() const { |
michael@0 | 332 | return spec; |
michael@0 | 333 | } |
michael@0 | 334 | |
michael@0 | 335 | UBool TransliteratorSpec::isLocale() const { |
michael@0 | 336 | return isSpecLocale; |
michael@0 | 337 | } |
michael@0 | 338 | |
michael@0 | 339 | ResourceBundle& TransliteratorSpec::getBundle() const { |
michael@0 | 340 | return *res; |
michael@0 | 341 | } |
michael@0 | 342 | |
michael@0 | 343 | //---------------------------------------------------------------------- |
michael@0 | 344 | |
michael@0 | 345 | #ifdef DEBUG_MEM |
michael@0 | 346 | |
michael@0 | 347 | // Vector of Entry pointers currently in use |
michael@0 | 348 | static UVector* DEBUG_entries = NULL; |
michael@0 | 349 | |
michael@0 | 350 | static void DEBUG_setup() { |
michael@0 | 351 | if (DEBUG_entries == NULL) { |
michael@0 | 352 | UErrorCode ec = U_ZERO_ERROR; |
michael@0 | 353 | DEBUG_entries = new UVector(ec); |
michael@0 | 354 | } |
michael@0 | 355 | } |
michael@0 | 356 | |
michael@0 | 357 | // Caller must call DEBUG_setup first. Return index of given Entry, |
michael@0 | 358 | // if it is in use (not deleted yet), or -1 if not found. |
michael@0 | 359 | static int DEBUG_findEntry(TransliteratorEntry* e) { |
michael@0 | 360 | for (int i=0; i<DEBUG_entries->size(); ++i) { |
michael@0 | 361 | if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) { |
michael@0 | 362 | return i; |
michael@0 | 363 | } |
michael@0 | 364 | } |
michael@0 | 365 | return -1; |
michael@0 | 366 | } |
michael@0 | 367 | |
michael@0 | 368 | // Track object creation |
michael@0 | 369 | static void DEBUG_newEntry(TransliteratorEntry* e) { |
michael@0 | 370 | DEBUG_setup(); |
michael@0 | 371 | if (DEBUG_findEntry(e) >= 0) { |
michael@0 | 372 | // This should really never happen unless the heap is broken |
michael@0 | 373 | printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e); |
michael@0 | 374 | return; |
michael@0 | 375 | } |
michael@0 | 376 | UErrorCode ec = U_ZERO_ERROR; |
michael@0 | 377 | DEBUG_entries->addElement(e, ec); |
michael@0 | 378 | } |
michael@0 | 379 | |
michael@0 | 380 | // Track object deletion |
michael@0 | 381 | static void DEBUG_delEntry(TransliteratorEntry* e) { |
michael@0 | 382 | DEBUG_setup(); |
michael@0 | 383 | int i = DEBUG_findEntry(e); |
michael@0 | 384 | if (i < 0) { |
michael@0 | 385 | printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e); |
michael@0 | 386 | return; |
michael@0 | 387 | } |
michael@0 | 388 | DEBUG_entries->removeElementAt(i); |
michael@0 | 389 | } |
michael@0 | 390 | |
michael@0 | 391 | // Track object usage |
michael@0 | 392 | static void DEBUG_useEntry(TransliteratorEntry* e) { |
michael@0 | 393 | if (e == NULL) return; |
michael@0 | 394 | DEBUG_setup(); |
michael@0 | 395 | int i = DEBUG_findEntry(e); |
michael@0 | 396 | if (i < 0) { |
michael@0 | 397 | printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e); |
michael@0 | 398 | } |
michael@0 | 399 | } |
michael@0 | 400 | |
michael@0 | 401 | #else |
michael@0 | 402 | // If we're not debugging then make these macros into NOPs |
michael@0 | 403 | #define DEBUG_newEntry(x) |
michael@0 | 404 | #define DEBUG_delEntry(x) |
michael@0 | 405 | #define DEBUG_useEntry(x) |
michael@0 | 406 | #endif |
michael@0 | 407 | |
michael@0 | 408 | //---------------------------------------------------------------------- |
michael@0 | 409 | // class Entry |
michael@0 | 410 | //---------------------------------------------------------------------- |
michael@0 | 411 | |
michael@0 | 412 | /** |
michael@0 | 413 | * The Entry object stores objects of different types and |
michael@0 | 414 | * singleton objects as placeholders for rule-based transliterators to |
michael@0 | 415 | * be built as needed. Instances of this struct can be placeholders, |
michael@0 | 416 | * can represent prototype transliterators to be cloned, or can |
michael@0 | 417 | * represent TransliteratorData objects. We don't support storing |
michael@0 | 418 | * classes in the registry because we don't have the rtti infrastructure |
michael@0 | 419 | * for it. We could easily add this if there is a need for it in the |
michael@0 | 420 | * future. |
michael@0 | 421 | */ |
michael@0 | 422 | class TransliteratorEntry : public UMemory { |
michael@0 | 423 | public: |
michael@0 | 424 | enum Type { |
michael@0 | 425 | RULES_FORWARD, |
michael@0 | 426 | RULES_REVERSE, |
michael@0 | 427 | LOCALE_RULES, |
michael@0 | 428 | PROTOTYPE, |
michael@0 | 429 | RBT_DATA, |
michael@0 | 430 | COMPOUND_RBT, |
michael@0 | 431 | ALIAS, |
michael@0 | 432 | FACTORY, |
michael@0 | 433 | NONE // Only used for uninitialized entries |
michael@0 | 434 | } entryType; |
michael@0 | 435 | // NOTE: stringArg cannot go inside the union because |
michael@0 | 436 | // it has a copy constructor |
michael@0 | 437 | UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT |
michael@0 | 438 | int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES |
michael@0 | 439 | UnicodeSet* compoundFilter; // For COMPOUND_RBT |
michael@0 | 440 | union { |
michael@0 | 441 | Transliterator* prototype; // For PROTOTYPE |
michael@0 | 442 | TransliterationRuleData* data; // For RBT_DATA |
michael@0 | 443 | UVector* dataVector; // For COMPOUND_RBT |
michael@0 | 444 | struct { |
michael@0 | 445 | Transliterator::Factory function; |
michael@0 | 446 | Transliterator::Token context; |
michael@0 | 447 | } factory; // For FACTORY |
michael@0 | 448 | } u; |
michael@0 | 449 | TransliteratorEntry(); |
michael@0 | 450 | ~TransliteratorEntry(); |
michael@0 | 451 | void adoptPrototype(Transliterator* adopted); |
michael@0 | 452 | void setFactory(Transliterator::Factory factory, |
michael@0 | 453 | Transliterator::Token context); |
michael@0 | 454 | |
michael@0 | 455 | private: |
michael@0 | 456 | |
michael@0 | 457 | TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class |
michael@0 | 458 | TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class |
michael@0 | 459 | }; |
michael@0 | 460 | |
michael@0 | 461 | TransliteratorEntry::TransliteratorEntry() { |
michael@0 | 462 | u.prototype = 0; |
michael@0 | 463 | compoundFilter = NULL; |
michael@0 | 464 | entryType = NONE; |
michael@0 | 465 | DEBUG_newEntry(this); |
michael@0 | 466 | } |
michael@0 | 467 | |
michael@0 | 468 | TransliteratorEntry::~TransliteratorEntry() { |
michael@0 | 469 | DEBUG_delEntry(this); |
michael@0 | 470 | if (entryType == PROTOTYPE) { |
michael@0 | 471 | delete u.prototype; |
michael@0 | 472 | } else if (entryType == RBT_DATA) { |
michael@0 | 473 | // The data object is shared between instances of RBT. The |
michael@0 | 474 | // entry object owns it. It should only be deleted when the |
michael@0 | 475 | // transliterator component is being cleaned up. Doing so |
michael@0 | 476 | // invalidates any RBTs that the user has instantiated. |
michael@0 | 477 | delete u.data; |
michael@0 | 478 | } else if (entryType == COMPOUND_RBT) { |
michael@0 | 479 | while (u.dataVector != NULL && !u.dataVector->isEmpty()) |
michael@0 | 480 | delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0); |
michael@0 | 481 | delete u.dataVector; |
michael@0 | 482 | } |
michael@0 | 483 | delete compoundFilter; |
michael@0 | 484 | } |
michael@0 | 485 | |
michael@0 | 486 | void TransliteratorEntry::adoptPrototype(Transliterator* adopted) { |
michael@0 | 487 | if (entryType == PROTOTYPE) { |
michael@0 | 488 | delete u.prototype; |
michael@0 | 489 | } |
michael@0 | 490 | entryType = PROTOTYPE; |
michael@0 | 491 | u.prototype = adopted; |
michael@0 | 492 | } |
michael@0 | 493 | |
michael@0 | 494 | void TransliteratorEntry::setFactory(Transliterator::Factory factory, |
michael@0 | 495 | Transliterator::Token context) { |
michael@0 | 496 | if (entryType == PROTOTYPE) { |
michael@0 | 497 | delete u.prototype; |
michael@0 | 498 | } |
michael@0 | 499 | entryType = FACTORY; |
michael@0 | 500 | u.factory.function = factory; |
michael@0 | 501 | u.factory.context = context; |
michael@0 | 502 | } |
michael@0 | 503 | |
michael@0 | 504 | // UObjectDeleter for Hashtable::setValueDeleter |
michael@0 | 505 | U_CDECL_BEGIN |
michael@0 | 506 | static void U_CALLCONV |
michael@0 | 507 | deleteEntry(void* obj) { |
michael@0 | 508 | delete (TransliteratorEntry*) obj; |
michael@0 | 509 | } |
michael@0 | 510 | U_CDECL_END |
michael@0 | 511 | |
michael@0 | 512 | //---------------------------------------------------------------------- |
michael@0 | 513 | // class TransliteratorRegistry: Basic public API |
michael@0 | 514 | //---------------------------------------------------------------------- |
michael@0 | 515 | |
michael@0 | 516 | TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) : |
michael@0 | 517 | registry(TRUE, status), |
michael@0 | 518 | specDAG(TRUE, status), |
michael@0 | 519 | availableIDs(status) |
michael@0 | 520 | { |
michael@0 | 521 | registry.setValueDeleter(deleteEntry); |
michael@0 | 522 | availableIDs.setDeleter(uprv_deleteUObject); |
michael@0 | 523 | availableIDs.setComparer(uhash_compareCaselessUnicodeString); |
michael@0 | 524 | specDAG.setValueDeleter(uhash_deleteHashtable); |
michael@0 | 525 | } |
michael@0 | 526 | |
michael@0 | 527 | TransliteratorRegistry::~TransliteratorRegistry() { |
michael@0 | 528 | // Through the magic of C++, everything cleans itself up |
michael@0 | 529 | } |
michael@0 | 530 | |
michael@0 | 531 | Transliterator* TransliteratorRegistry::get(const UnicodeString& ID, |
michael@0 | 532 | TransliteratorAlias*& aliasReturn, |
michael@0 | 533 | UErrorCode& status) { |
michael@0 | 534 | U_ASSERT(aliasReturn == NULL); |
michael@0 | 535 | TransliteratorEntry *entry = find(ID); |
michael@0 | 536 | return (entry == 0) ? 0 |
michael@0 | 537 | : instantiateEntry(ID, entry, aliasReturn, status); |
michael@0 | 538 | } |
michael@0 | 539 | |
michael@0 | 540 | Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID, |
michael@0 | 541 | TransliteratorParser& parser, |
michael@0 | 542 | TransliteratorAlias*& aliasReturn, |
michael@0 | 543 | UErrorCode& status) { |
michael@0 | 544 | U_ASSERT(aliasReturn == NULL); |
michael@0 | 545 | TransliteratorEntry *entry = find(ID); |
michael@0 | 546 | |
michael@0 | 547 | if (entry == 0) { |
michael@0 | 548 | // We get to this point if there are two threads, one of which |
michael@0 | 549 | // is instantiating an ID, and another of which is removing |
michael@0 | 550 | // the same ID from the registry, and the timing is just right. |
michael@0 | 551 | return 0; |
michael@0 | 552 | } |
michael@0 | 553 | |
michael@0 | 554 | // The usage model for the caller is that they will first call |
michael@0 | 555 | // reg->get() inside the mutex, they'll get back an alias, they call |
michael@0 | 556 | // alias->isRuleBased(), and if they get TRUE, they call alias->parse() |
michael@0 | 557 | // outside the mutex, then reg->reget() inside the mutex again. A real |
michael@0 | 558 | // mess, but it gets things working for ICU 3.0. [alan]. |
michael@0 | 559 | |
michael@0 | 560 | // Note: It's possible that in between the caller calling |
michael@0 | 561 | // alias->parse() and reg->reget(), that another thread will have |
michael@0 | 562 | // called reg->reget(), and the entry will already have been fixed up. |
michael@0 | 563 | // We have to detect this so we don't stomp over existing entry |
michael@0 | 564 | // data members and potentially leak memory (u.data and compoundFilter). |
michael@0 | 565 | |
michael@0 | 566 | if (entry->entryType == TransliteratorEntry::RULES_FORWARD || |
michael@0 | 567 | entry->entryType == TransliteratorEntry::RULES_REVERSE || |
michael@0 | 568 | entry->entryType == TransliteratorEntry::LOCALE_RULES) { |
michael@0 | 569 | |
michael@0 | 570 | if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) { |
michael@0 | 571 | entry->u.data = 0; |
michael@0 | 572 | entry->entryType = TransliteratorEntry::ALIAS; |
michael@0 | 573 | entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL"); |
michael@0 | 574 | } |
michael@0 | 575 | else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) { |
michael@0 | 576 | entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); |
michael@0 | 577 | entry->entryType = TransliteratorEntry::RBT_DATA; |
michael@0 | 578 | } |
michael@0 | 579 | else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) { |
michael@0 | 580 | entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0)); |
michael@0 | 581 | entry->compoundFilter = parser.orphanCompoundFilter(); |
michael@0 | 582 | entry->entryType = TransliteratorEntry::ALIAS; |
michael@0 | 583 | } |
michael@0 | 584 | else { |
michael@0 | 585 | entry->entryType = TransliteratorEntry::COMPOUND_RBT; |
michael@0 | 586 | entry->compoundFilter = parser.orphanCompoundFilter(); |
michael@0 | 587 | entry->u.dataVector = new UVector(status); |
michael@0 | 588 | entry->stringArg.remove(); |
michael@0 | 589 | |
michael@0 | 590 | int32_t limit = parser.idBlockVector.size(); |
michael@0 | 591 | if (parser.dataVector.size() > limit) |
michael@0 | 592 | limit = parser.dataVector.size(); |
michael@0 | 593 | |
michael@0 | 594 | for (int32_t i = 0; i < limit; i++) { |
michael@0 | 595 | if (i < parser.idBlockVector.size()) { |
michael@0 | 596 | UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i); |
michael@0 | 597 | if (!idBlock->isEmpty()) |
michael@0 | 598 | entry->stringArg += *idBlock; |
michael@0 | 599 | } |
michael@0 | 600 | if (!parser.dataVector.isEmpty()) { |
michael@0 | 601 | TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); |
michael@0 | 602 | entry->u.dataVector->addElement(data, status); |
michael@0 | 603 | entry->stringArg += (UChar)0xffff; // use U+FFFF to mark position of RBTs in ID block |
michael@0 | 604 | } |
michael@0 | 605 | } |
michael@0 | 606 | } |
michael@0 | 607 | } |
michael@0 | 608 | |
michael@0 | 609 | Transliterator *t = |
michael@0 | 610 | instantiateEntry(ID, entry, aliasReturn, status); |
michael@0 | 611 | return t; |
michael@0 | 612 | } |
michael@0 | 613 | |
michael@0 | 614 | void TransliteratorRegistry::put(Transliterator* adoptedProto, |
michael@0 | 615 | UBool visible, |
michael@0 | 616 | UErrorCode& ec) |
michael@0 | 617 | { |
michael@0 | 618 | TransliteratorEntry *entry = new TransliteratorEntry(); |
michael@0 | 619 | if (entry == NULL) { |
michael@0 | 620 | ec = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 621 | return; |
michael@0 | 622 | } |
michael@0 | 623 | entry->adoptPrototype(adoptedProto); |
michael@0 | 624 | registerEntry(adoptedProto->getID(), entry, visible); |
michael@0 | 625 | } |
michael@0 | 626 | |
michael@0 | 627 | void TransliteratorRegistry::put(const UnicodeString& ID, |
michael@0 | 628 | Transliterator::Factory factory, |
michael@0 | 629 | Transliterator::Token context, |
michael@0 | 630 | UBool visible, |
michael@0 | 631 | UErrorCode& ec) { |
michael@0 | 632 | TransliteratorEntry *entry = new TransliteratorEntry(); |
michael@0 | 633 | if (entry == NULL) { |
michael@0 | 634 | ec = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 635 | return; |
michael@0 | 636 | } |
michael@0 | 637 | entry->setFactory(factory, context); |
michael@0 | 638 | registerEntry(ID, entry, visible); |
michael@0 | 639 | } |
michael@0 | 640 | |
michael@0 | 641 | void TransliteratorRegistry::put(const UnicodeString& ID, |
michael@0 | 642 | const UnicodeString& resourceName, |
michael@0 | 643 | UTransDirection dir, |
michael@0 | 644 | UBool readonlyResourceAlias, |
michael@0 | 645 | UBool visible, |
michael@0 | 646 | UErrorCode& ec) { |
michael@0 | 647 | TransliteratorEntry *entry = new TransliteratorEntry(); |
michael@0 | 648 | if (entry == NULL) { |
michael@0 | 649 | ec = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 650 | return; |
michael@0 | 651 | } |
michael@0 | 652 | entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD |
michael@0 | 653 | : TransliteratorEntry::RULES_REVERSE; |
michael@0 | 654 | if (readonlyResourceAlias) { |
michael@0 | 655 | entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1); |
michael@0 | 656 | } |
michael@0 | 657 | else { |
michael@0 | 658 | entry->stringArg = resourceName; |
michael@0 | 659 | } |
michael@0 | 660 | registerEntry(ID, entry, visible); |
michael@0 | 661 | } |
michael@0 | 662 | |
michael@0 | 663 | void TransliteratorRegistry::put(const UnicodeString& ID, |
michael@0 | 664 | const UnicodeString& alias, |
michael@0 | 665 | UBool readonlyAliasAlias, |
michael@0 | 666 | UBool visible, |
michael@0 | 667 | UErrorCode& /*ec*/) { |
michael@0 | 668 | TransliteratorEntry *entry = new TransliteratorEntry(); |
michael@0 | 669 | // Null pointer check |
michael@0 | 670 | if (entry != NULL) { |
michael@0 | 671 | entry->entryType = TransliteratorEntry::ALIAS; |
michael@0 | 672 | if (readonlyAliasAlias) { |
michael@0 | 673 | entry->stringArg.setTo(TRUE, alias.getBuffer(), -1); |
michael@0 | 674 | } |
michael@0 | 675 | else { |
michael@0 | 676 | entry->stringArg = alias; |
michael@0 | 677 | } |
michael@0 | 678 | registerEntry(ID, entry, visible); |
michael@0 | 679 | } |
michael@0 | 680 | } |
michael@0 | 681 | |
michael@0 | 682 | void TransliteratorRegistry::remove(const UnicodeString& ID) { |
michael@0 | 683 | UnicodeString source, target, variant; |
michael@0 | 684 | UBool sawSource; |
michael@0 | 685 | TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); |
michael@0 | 686 | // Only need to do this if ID.indexOf('-') < 0 |
michael@0 | 687 | UnicodeString id; |
michael@0 | 688 | TransliteratorIDParser::STVtoID(source, target, variant, id); |
michael@0 | 689 | registry.remove(id); |
michael@0 | 690 | removeSTV(source, target, variant); |
michael@0 | 691 | availableIDs.removeElement((void*) &id); |
michael@0 | 692 | } |
michael@0 | 693 | |
michael@0 | 694 | //---------------------------------------------------------------------- |
michael@0 | 695 | // class TransliteratorRegistry: Public ID and spec management |
michael@0 | 696 | //---------------------------------------------------------------------- |
michael@0 | 697 | |
michael@0 | 698 | /** |
michael@0 | 699 | * == OBSOLETE - remove in ICU 3.4 == |
michael@0 | 700 | * Return the number of IDs currently registered with the system. |
michael@0 | 701 | * To retrieve the actual IDs, call getAvailableID(i) with |
michael@0 | 702 | * i from 0 to countAvailableIDs() - 1. |
michael@0 | 703 | */ |
michael@0 | 704 | int32_t TransliteratorRegistry::countAvailableIDs(void) const { |
michael@0 | 705 | return availableIDs.size(); |
michael@0 | 706 | } |
michael@0 | 707 | |
michael@0 | 708 | /** |
michael@0 | 709 | * == OBSOLETE - remove in ICU 3.4 == |
michael@0 | 710 | * Return the index-th available ID. index must be between 0 |
michael@0 | 711 | * and countAvailableIDs() - 1, inclusive. If index is out of |
michael@0 | 712 | * range, the result of getAvailableID(0) is returned. |
michael@0 | 713 | */ |
michael@0 | 714 | const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const { |
michael@0 | 715 | if (index < 0 || index >= availableIDs.size()) { |
michael@0 | 716 | index = 0; |
michael@0 | 717 | } |
michael@0 | 718 | return *(const UnicodeString*) availableIDs[index]; |
michael@0 | 719 | } |
michael@0 | 720 | |
michael@0 | 721 | StringEnumeration* TransliteratorRegistry::getAvailableIDs() const { |
michael@0 | 722 | return new Enumeration(*this); |
michael@0 | 723 | } |
michael@0 | 724 | |
michael@0 | 725 | int32_t TransliteratorRegistry::countAvailableSources(void) const { |
michael@0 | 726 | return specDAG.count(); |
michael@0 | 727 | } |
michael@0 | 728 | |
michael@0 | 729 | UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index, |
michael@0 | 730 | UnicodeString& result) const { |
michael@0 | 731 | int32_t pos = -1; |
michael@0 | 732 | const UHashElement *e = 0; |
michael@0 | 733 | while (index-- >= 0) { |
michael@0 | 734 | e = specDAG.nextElement(pos); |
michael@0 | 735 | if (e == 0) { |
michael@0 | 736 | break; |
michael@0 | 737 | } |
michael@0 | 738 | } |
michael@0 | 739 | if (e == 0) { |
michael@0 | 740 | result.truncate(0); |
michael@0 | 741 | } else { |
michael@0 | 742 | result = *(UnicodeString*) e->key.pointer; |
michael@0 | 743 | } |
michael@0 | 744 | return result; |
michael@0 | 745 | } |
michael@0 | 746 | |
michael@0 | 747 | int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const { |
michael@0 | 748 | Hashtable *targets = (Hashtable*) specDAG.get(source); |
michael@0 | 749 | return (targets == 0) ? 0 : targets->count(); |
michael@0 | 750 | } |
michael@0 | 751 | |
michael@0 | 752 | UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index, |
michael@0 | 753 | const UnicodeString& source, |
michael@0 | 754 | UnicodeString& result) const { |
michael@0 | 755 | Hashtable *targets = (Hashtable*) specDAG.get(source); |
michael@0 | 756 | if (targets == 0) { |
michael@0 | 757 | result.truncate(0); // invalid source |
michael@0 | 758 | return result; |
michael@0 | 759 | } |
michael@0 | 760 | int32_t pos = -1; |
michael@0 | 761 | const UHashElement *e = 0; |
michael@0 | 762 | while (index-- >= 0) { |
michael@0 | 763 | e = targets->nextElement(pos); |
michael@0 | 764 | if (e == 0) { |
michael@0 | 765 | break; |
michael@0 | 766 | } |
michael@0 | 767 | } |
michael@0 | 768 | if (e == 0) { |
michael@0 | 769 | result.truncate(0); // invalid index |
michael@0 | 770 | } else { |
michael@0 | 771 | result = *(UnicodeString*) e->key.pointer; |
michael@0 | 772 | } |
michael@0 | 773 | return result; |
michael@0 | 774 | } |
michael@0 | 775 | |
michael@0 | 776 | int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source, |
michael@0 | 777 | const UnicodeString& target) const { |
michael@0 | 778 | Hashtable *targets = (Hashtable*) specDAG.get(source); |
michael@0 | 779 | if (targets == 0) { |
michael@0 | 780 | return 0; |
michael@0 | 781 | } |
michael@0 | 782 | UVector *variants = (UVector*) targets->get(target); |
michael@0 | 783 | // variants may be 0 if the source/target are invalid |
michael@0 | 784 | return (variants == 0) ? 0 : variants->size(); |
michael@0 | 785 | } |
michael@0 | 786 | |
michael@0 | 787 | UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index, |
michael@0 | 788 | const UnicodeString& source, |
michael@0 | 789 | const UnicodeString& target, |
michael@0 | 790 | UnicodeString& result) const { |
michael@0 | 791 | Hashtable *targets = (Hashtable*) specDAG.get(source); |
michael@0 | 792 | if (targets == 0) { |
michael@0 | 793 | result.truncate(0); // invalid source |
michael@0 | 794 | return result; |
michael@0 | 795 | } |
michael@0 | 796 | UVector *variants = (UVector*) targets->get(target); |
michael@0 | 797 | if (variants == 0) { |
michael@0 | 798 | result.truncate(0); // invalid target |
michael@0 | 799 | return result; |
michael@0 | 800 | } |
michael@0 | 801 | UnicodeString *v = (UnicodeString*) variants->elementAt(index); |
michael@0 | 802 | if (v == 0) { |
michael@0 | 803 | result.truncate(0); // invalid index |
michael@0 | 804 | } else { |
michael@0 | 805 | result = *v; |
michael@0 | 806 | } |
michael@0 | 807 | return result; |
michael@0 | 808 | } |
michael@0 | 809 | |
michael@0 | 810 | //---------------------------------------------------------------------- |
michael@0 | 811 | // class TransliteratorRegistry::Enumeration |
michael@0 | 812 | //---------------------------------------------------------------------- |
michael@0 | 813 | |
michael@0 | 814 | TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) : |
michael@0 | 815 | index(0), reg(_reg) { |
michael@0 | 816 | } |
michael@0 | 817 | |
michael@0 | 818 | TransliteratorRegistry::Enumeration::~Enumeration() { |
michael@0 | 819 | } |
michael@0 | 820 | |
michael@0 | 821 | int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const { |
michael@0 | 822 | return reg.availableIDs.size(); |
michael@0 | 823 | } |
michael@0 | 824 | |
michael@0 | 825 | const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) { |
michael@0 | 826 | // This is sloppy but safe -- if we get out of sync with the underlying |
michael@0 | 827 | // registry, we will still return legal strings, but they might not |
michael@0 | 828 | // correspond to the snapshot at construction time. So there could be |
michael@0 | 829 | // duplicate IDs or omitted IDs if insertions or deletions occur in one |
michael@0 | 830 | // thread while another is iterating. To be more rigorous, add a timestamp, |
michael@0 | 831 | // which is incremented with any modification, and validate this iterator |
michael@0 | 832 | // against the timestamp at construction time. This probably isn't worth |
michael@0 | 833 | // doing as long as there is some possibility of removing this code in favor |
michael@0 | 834 | // of some new code based on Doug's service framework. |
michael@0 | 835 | if (U_FAILURE(status)) { |
michael@0 | 836 | return NULL; |
michael@0 | 837 | } |
michael@0 | 838 | int32_t n = reg.availableIDs.size(); |
michael@0 | 839 | if (index > n) { |
michael@0 | 840 | status = U_ENUM_OUT_OF_SYNC_ERROR; |
michael@0 | 841 | } |
michael@0 | 842 | // index == n is okay -- this means we've reached the end |
michael@0 | 843 | if (index < n) { |
michael@0 | 844 | // Copy the string! This avoids lifetime problems. |
michael@0 | 845 | unistr = *(const UnicodeString*)reg.availableIDs[index++]; |
michael@0 | 846 | return &unistr; |
michael@0 | 847 | } else { |
michael@0 | 848 | return NULL; |
michael@0 | 849 | } |
michael@0 | 850 | } |
michael@0 | 851 | |
michael@0 | 852 | void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) { |
michael@0 | 853 | index = 0; |
michael@0 | 854 | } |
michael@0 | 855 | |
michael@0 | 856 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration) |
michael@0 | 857 | |
michael@0 | 858 | //---------------------------------------------------------------------- |
michael@0 | 859 | // class TransliteratorRegistry: internal |
michael@0 | 860 | //---------------------------------------------------------------------- |
michael@0 | 861 | |
michael@0 | 862 | /** |
michael@0 | 863 | * Convenience method. Calls 6-arg registerEntry(). |
michael@0 | 864 | */ |
michael@0 | 865 | void TransliteratorRegistry::registerEntry(const UnicodeString& source, |
michael@0 | 866 | const UnicodeString& target, |
michael@0 | 867 | const UnicodeString& variant, |
michael@0 | 868 | TransliteratorEntry* adopted, |
michael@0 | 869 | UBool visible) { |
michael@0 | 870 | UnicodeString ID; |
michael@0 | 871 | UnicodeString s(source); |
michael@0 | 872 | if (s.length() == 0) { |
michael@0 | 873 | s.setTo(TRUE, ANY, 3); |
michael@0 | 874 | } |
michael@0 | 875 | TransliteratorIDParser::STVtoID(source, target, variant, ID); |
michael@0 | 876 | registerEntry(ID, s, target, variant, adopted, visible); |
michael@0 | 877 | } |
michael@0 | 878 | |
michael@0 | 879 | /** |
michael@0 | 880 | * Convenience method. Calls 6-arg registerEntry(). |
michael@0 | 881 | */ |
michael@0 | 882 | void TransliteratorRegistry::registerEntry(const UnicodeString& ID, |
michael@0 | 883 | TransliteratorEntry* adopted, |
michael@0 | 884 | UBool visible) { |
michael@0 | 885 | UnicodeString source, target, variant; |
michael@0 | 886 | UBool sawSource; |
michael@0 | 887 | TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); |
michael@0 | 888 | // Only need to do this if ID.indexOf('-') < 0 |
michael@0 | 889 | UnicodeString id; |
michael@0 | 890 | TransliteratorIDParser::STVtoID(source, target, variant, id); |
michael@0 | 891 | registerEntry(id, source, target, variant, adopted, visible); |
michael@0 | 892 | } |
michael@0 | 893 | |
michael@0 | 894 | /** |
michael@0 | 895 | * Register an entry object (adopted) with the given ID, source, |
michael@0 | 896 | * target, and variant strings. |
michael@0 | 897 | */ |
michael@0 | 898 | void TransliteratorRegistry::registerEntry(const UnicodeString& ID, |
michael@0 | 899 | const UnicodeString& source, |
michael@0 | 900 | const UnicodeString& target, |
michael@0 | 901 | const UnicodeString& variant, |
michael@0 | 902 | TransliteratorEntry* adopted, |
michael@0 | 903 | UBool visible) { |
michael@0 | 904 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 905 | registry.put(ID, adopted, status); |
michael@0 | 906 | if (visible) { |
michael@0 | 907 | registerSTV(source, target, variant); |
michael@0 | 908 | if (!availableIDs.contains((void*) &ID)) { |
michael@0 | 909 | UnicodeString *newID = (UnicodeString *)ID.clone(); |
michael@0 | 910 | // Check to make sure newID was created. |
michael@0 | 911 | if (newID != NULL) { |
michael@0 | 912 | // NUL-terminate the ID string |
michael@0 | 913 | newID->getTerminatedBuffer(); |
michael@0 | 914 | availableIDs.addElement(newID, status); |
michael@0 | 915 | } |
michael@0 | 916 | } |
michael@0 | 917 | } else { |
michael@0 | 918 | removeSTV(source, target, variant); |
michael@0 | 919 | availableIDs.removeElement((void*) &ID); |
michael@0 | 920 | } |
michael@0 | 921 | } |
michael@0 | 922 | |
michael@0 | 923 | /** |
michael@0 | 924 | * Register a source-target/variant in the specDAG. Variant may be |
michael@0 | 925 | * empty, but source and target must not be. If variant is empty then |
michael@0 | 926 | * the special variant NO_VARIANT is stored in slot zero of the |
michael@0 | 927 | * UVector of variants. |
michael@0 | 928 | */ |
michael@0 | 929 | void TransliteratorRegistry::registerSTV(const UnicodeString& source, |
michael@0 | 930 | const UnicodeString& target, |
michael@0 | 931 | const UnicodeString& variant) { |
michael@0 | 932 | // assert(source.length() > 0); |
michael@0 | 933 | // assert(target.length() > 0); |
michael@0 | 934 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 935 | Hashtable *targets = (Hashtable*) specDAG.get(source); |
michael@0 | 936 | if (targets == 0) { |
michael@0 | 937 | targets = new Hashtable(TRUE, status); |
michael@0 | 938 | if (U_FAILURE(status) || targets == 0) { |
michael@0 | 939 | return; |
michael@0 | 940 | } |
michael@0 | 941 | targets->setValueDeleter(uprv_deleteUObject); |
michael@0 | 942 | specDAG.put(source, targets, status); |
michael@0 | 943 | } |
michael@0 | 944 | UVector *variants = (UVector*) targets->get(target); |
michael@0 | 945 | if (variants == 0) { |
michael@0 | 946 | variants = new UVector(uprv_deleteUObject, |
michael@0 | 947 | uhash_compareCaselessUnicodeString, status); |
michael@0 | 948 | if (variants == 0) { |
michael@0 | 949 | return; |
michael@0 | 950 | } |
michael@0 | 951 | targets->put(target, variants, status); |
michael@0 | 952 | } |
michael@0 | 953 | // assert(NO_VARIANT == ""); |
michael@0 | 954 | // We add the variant string. If it is the special "no variant" |
michael@0 | 955 | // string, that is, the empty string, we add it at position zero. |
michael@0 | 956 | if (!variants->contains((void*) &variant)) { |
michael@0 | 957 | UnicodeString *tempus; // Used for null pointer check. |
michael@0 | 958 | if (variant.length() > 0) { |
michael@0 | 959 | tempus = new UnicodeString(variant); |
michael@0 | 960 | if (tempus != NULL) { |
michael@0 | 961 | variants->addElement(tempus, status); |
michael@0 | 962 | } |
michael@0 | 963 | } else { |
michael@0 | 964 | tempus = new UnicodeString(); // = NO_VARIANT |
michael@0 | 965 | if (tempus != NULL) { |
michael@0 | 966 | variants->insertElementAt(tempus, 0, status); |
michael@0 | 967 | } |
michael@0 | 968 | } |
michael@0 | 969 | } |
michael@0 | 970 | } |
michael@0 | 971 | |
michael@0 | 972 | /** |
michael@0 | 973 | * Remove a source-target/variant from the specDAG. |
michael@0 | 974 | */ |
michael@0 | 975 | void TransliteratorRegistry::removeSTV(const UnicodeString& source, |
michael@0 | 976 | const UnicodeString& target, |
michael@0 | 977 | const UnicodeString& variant) { |
michael@0 | 978 | // assert(source.length() > 0); |
michael@0 | 979 | // assert(target.length() > 0); |
michael@0 | 980 | // UErrorCode status = U_ZERO_ERROR; |
michael@0 | 981 | Hashtable *targets = (Hashtable*) specDAG.get(source); |
michael@0 | 982 | if (targets == 0) { |
michael@0 | 983 | return; // should never happen for valid s-t/v |
michael@0 | 984 | } |
michael@0 | 985 | UVector *variants = (UVector*) targets->get(target); |
michael@0 | 986 | if (variants == 0) { |
michael@0 | 987 | return; // should never happen for valid s-t/v |
michael@0 | 988 | } |
michael@0 | 989 | variants->removeElement((void*) &variant); |
michael@0 | 990 | if (variants->size() == 0) { |
michael@0 | 991 | targets->remove(target); // should delete variants |
michael@0 | 992 | if (targets->count() == 0) { |
michael@0 | 993 | specDAG.remove(source); // should delete targets |
michael@0 | 994 | } |
michael@0 | 995 | } |
michael@0 | 996 | } |
michael@0 | 997 | |
michael@0 | 998 | /** |
michael@0 | 999 | * Attempt to find a source-target/variant in the dynamic registry |
michael@0 | 1000 | * store. Return 0 on failure. |
michael@0 | 1001 | * |
michael@0 | 1002 | * Caller does NOT own returned object. |
michael@0 | 1003 | */ |
michael@0 | 1004 | TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src, |
michael@0 | 1005 | const TransliteratorSpec& trg, |
michael@0 | 1006 | const UnicodeString& variant) const { |
michael@0 | 1007 | UnicodeString ID; |
michael@0 | 1008 | TransliteratorIDParser::STVtoID(src, trg, variant, ID); |
michael@0 | 1009 | TransliteratorEntry *e = (TransliteratorEntry*) registry.get(ID); |
michael@0 | 1010 | DEBUG_useEntry(e); |
michael@0 | 1011 | return e; |
michael@0 | 1012 | } |
michael@0 | 1013 | |
michael@0 | 1014 | /** |
michael@0 | 1015 | * Attempt to find a source-target/variant in the static locale |
michael@0 | 1016 | * resource store. Do not perform fallback. Return 0 on failure. |
michael@0 | 1017 | * |
michael@0 | 1018 | * On success, create a new entry object, register it in the dynamic |
michael@0 | 1019 | * store, and return a pointer to it, but do not make it public -- |
michael@0 | 1020 | * just because someone requested something, we do not expand the |
michael@0 | 1021 | * available ID list (or spec DAG). |
michael@0 | 1022 | * |
michael@0 | 1023 | * Caller does NOT own returned object. |
michael@0 | 1024 | */ |
michael@0 | 1025 | TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src, |
michael@0 | 1026 | const TransliteratorSpec& trg, |
michael@0 | 1027 | const UnicodeString& variant) { |
michael@0 | 1028 | TransliteratorEntry* entry = 0; |
michael@0 | 1029 | if (src.isLocale()) { |
michael@0 | 1030 | entry = findInBundle(src, trg, variant, UTRANS_FORWARD); |
michael@0 | 1031 | } else if (trg.isLocale()) { |
michael@0 | 1032 | entry = findInBundle(trg, src, variant, UTRANS_REVERSE); |
michael@0 | 1033 | } |
michael@0 | 1034 | |
michael@0 | 1035 | // If we found an entry, store it in the Hashtable for next |
michael@0 | 1036 | // time. |
michael@0 | 1037 | if (entry != 0) { |
michael@0 | 1038 | registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE); |
michael@0 | 1039 | } |
michael@0 | 1040 | |
michael@0 | 1041 | return entry; |
michael@0 | 1042 | } |
michael@0 | 1043 | |
michael@0 | 1044 | // As of 2.0, resource bundle keys cannot contain '_' |
michael@0 | 1045 | static const UChar TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo" |
michael@0 | 1046 | |
michael@0 | 1047 | static const UChar TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom" |
michael@0 | 1048 | |
michael@0 | 1049 | static const UChar TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate" |
michael@0 | 1050 | |
michael@0 | 1051 | /** |
michael@0 | 1052 | * Attempt to find an entry in a single resource bundle. This is |
michael@0 | 1053 | * a one-sided lookup. findInStaticStore() performs up to two such |
michael@0 | 1054 | * lookups, one for the source, and one for the target. |
michael@0 | 1055 | * |
michael@0 | 1056 | * Do not perform fallback. Return 0 on failure. |
michael@0 | 1057 | * |
michael@0 | 1058 | * On success, create a new Entry object, populate it, and return it. |
michael@0 | 1059 | * The caller owns the returned object. |
michael@0 | 1060 | */ |
michael@0 | 1061 | TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen, |
michael@0 | 1062 | const TransliteratorSpec& specToFind, |
michael@0 | 1063 | const UnicodeString& variant, |
michael@0 | 1064 | UTransDirection direction) |
michael@0 | 1065 | { |
michael@0 | 1066 | UnicodeString utag; |
michael@0 | 1067 | UnicodeString resStr; |
michael@0 | 1068 | int32_t pass; |
michael@0 | 1069 | |
michael@0 | 1070 | for (pass=0; pass<2; ++pass) { |
michael@0 | 1071 | utag.truncate(0); |
michael@0 | 1072 | // First try either TransliteratorTo_xxx or |
michael@0 | 1073 | // TransliterateFrom_xxx, then try the bidirectional |
michael@0 | 1074 | // Transliterate_xxx. This precedence order is arbitrary |
michael@0 | 1075 | // but must be consistent and documented. |
michael@0 | 1076 | if (pass == 0) { |
michael@0 | 1077 | utag.append(direction == UTRANS_FORWARD ? |
michael@0 | 1078 | TRANSLITERATE_TO : TRANSLITERATE_FROM, -1); |
michael@0 | 1079 | } else { |
michael@0 | 1080 | utag.append(TRANSLITERATE, -1); |
michael@0 | 1081 | } |
michael@0 | 1082 | UnicodeString s(specToFind.get()); |
michael@0 | 1083 | utag.append(s.toUpper("")); |
michael@0 | 1084 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 1085 | ResourceBundle subres(specToOpen.getBundle().get( |
michael@0 | 1086 | CharString().appendInvariantChars(utag, status).data(), status)); |
michael@0 | 1087 | if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { |
michael@0 | 1088 | continue; |
michael@0 | 1089 | } |
michael@0 | 1090 | |
michael@0 | 1091 | s.truncate(0); |
michael@0 | 1092 | if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) { |
michael@0 | 1093 | continue; |
michael@0 | 1094 | } |
michael@0 | 1095 | |
michael@0 | 1096 | if (variant.length() != 0) { |
michael@0 | 1097 | status = U_ZERO_ERROR; |
michael@0 | 1098 | resStr = subres.getStringEx( |
michael@0 | 1099 | CharString().appendInvariantChars(variant, status).data(), status); |
michael@0 | 1100 | if (U_SUCCESS(status)) { |
michael@0 | 1101 | // Exit loop successfully |
michael@0 | 1102 | break; |
michael@0 | 1103 | } |
michael@0 | 1104 | } else { |
michael@0 | 1105 | // Variant is empty, which means match the first variant listed. |
michael@0 | 1106 | status = U_ZERO_ERROR; |
michael@0 | 1107 | resStr = subres.getStringEx(1, status); |
michael@0 | 1108 | if (U_SUCCESS(status)) { |
michael@0 | 1109 | // Exit loop successfully |
michael@0 | 1110 | break; |
michael@0 | 1111 | } |
michael@0 | 1112 | } |
michael@0 | 1113 | } |
michael@0 | 1114 | |
michael@0 | 1115 | if (pass==2) { |
michael@0 | 1116 | // Failed |
michael@0 | 1117 | return NULL; |
michael@0 | 1118 | } |
michael@0 | 1119 | |
michael@0 | 1120 | // We have succeeded in loading a string from the locale |
michael@0 | 1121 | // resources. Create a new registry entry to hold it and return it. |
michael@0 | 1122 | TransliteratorEntry *entry = new TransliteratorEntry(); |
michael@0 | 1123 | if (entry != 0) { |
michael@0 | 1124 | // The direction is always forward for the |
michael@0 | 1125 | // TransliterateTo_xxx and TransliterateFrom_xxx |
michael@0 | 1126 | // items; those are unidirectional forward rules. |
michael@0 | 1127 | // For the bidirectional Transliterate_xxx items, |
michael@0 | 1128 | // the direction is the value passed in to this |
michael@0 | 1129 | // function. |
michael@0 | 1130 | int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction; |
michael@0 | 1131 | entry->entryType = TransliteratorEntry::LOCALE_RULES; |
michael@0 | 1132 | entry->stringArg = resStr; |
michael@0 | 1133 | entry->intArg = dir; |
michael@0 | 1134 | } |
michael@0 | 1135 | |
michael@0 | 1136 | return entry; |
michael@0 | 1137 | } |
michael@0 | 1138 | |
michael@0 | 1139 | /** |
michael@0 | 1140 | * Convenience method. Calls 3-arg find(). |
michael@0 | 1141 | */ |
michael@0 | 1142 | TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) { |
michael@0 | 1143 | UnicodeString source, target, variant; |
michael@0 | 1144 | UBool sawSource; |
michael@0 | 1145 | TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); |
michael@0 | 1146 | return find(source, target, variant); |
michael@0 | 1147 | } |
michael@0 | 1148 | |
michael@0 | 1149 | /** |
michael@0 | 1150 | * Top-level find method. Attempt to find a source-target/variant in |
michael@0 | 1151 | * either the dynamic or the static (locale resource) store. Perform |
michael@0 | 1152 | * fallback. |
michael@0 | 1153 | * |
michael@0 | 1154 | * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v: |
michael@0 | 1155 | * |
michael@0 | 1156 | * ss_SS_SSS-tt_TT_TTT/v -- in hashtable |
michael@0 | 1157 | * ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback) |
michael@0 | 1158 | * |
michael@0 | 1159 | * repeat with t = tt_TT_TTT, tt_TT, tt, and tscript |
michael@0 | 1160 | * |
michael@0 | 1161 | * ss_SS_SSS-t/ * |
michael@0 | 1162 | * ss_SS-t/ * |
michael@0 | 1163 | * ss-t/ * |
michael@0 | 1164 | * sscript-t/ * |
michael@0 | 1165 | * |
michael@0 | 1166 | * Here * matches the first variant listed. |
michael@0 | 1167 | * |
michael@0 | 1168 | * Caller does NOT own returned object. Return 0 on failure. |
michael@0 | 1169 | */ |
michael@0 | 1170 | TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source, |
michael@0 | 1171 | UnicodeString& target, |
michael@0 | 1172 | UnicodeString& variant) { |
michael@0 | 1173 | |
michael@0 | 1174 | TransliteratorSpec src(source); |
michael@0 | 1175 | TransliteratorSpec trg(target); |
michael@0 | 1176 | TransliteratorEntry* entry; |
michael@0 | 1177 | |
michael@0 | 1178 | // Seek exact match in hashtable. Temporary fix for ICU 4.6. |
michael@0 | 1179 | // TODO: The general logic for finding a matching transliterator needs to be reviewed. |
michael@0 | 1180 | // ICU ticket #8089 |
michael@0 | 1181 | UnicodeString ID; |
michael@0 | 1182 | TransliteratorIDParser::STVtoID(source, target, variant, ID); |
michael@0 | 1183 | entry = (TransliteratorEntry*) registry.get(ID); |
michael@0 | 1184 | if (entry != 0) { |
michael@0 | 1185 | // std::string ss; |
michael@0 | 1186 | // std::cout << ID.toUTF8String(ss) << std::endl; |
michael@0 | 1187 | return entry; |
michael@0 | 1188 | } |
michael@0 | 1189 | |
michael@0 | 1190 | if (variant.length() != 0) { |
michael@0 | 1191 | |
michael@0 | 1192 | // Seek exact match in hashtable |
michael@0 | 1193 | entry = findInDynamicStore(src, trg, variant); |
michael@0 | 1194 | if (entry != 0) { |
michael@0 | 1195 | return entry; |
michael@0 | 1196 | } |
michael@0 | 1197 | |
michael@0 | 1198 | // Seek exact match in locale resources |
michael@0 | 1199 | entry = findInStaticStore(src, trg, variant); |
michael@0 | 1200 | if (entry != 0) { |
michael@0 | 1201 | return entry; |
michael@0 | 1202 | } |
michael@0 | 1203 | } |
michael@0 | 1204 | |
michael@0 | 1205 | for (;;) { |
michael@0 | 1206 | src.reset(); |
michael@0 | 1207 | for (;;) { |
michael@0 | 1208 | // Seek match in hashtable |
michael@0 | 1209 | entry = findInDynamicStore(src, trg, NO_VARIANT); |
michael@0 | 1210 | if (entry != 0) { |
michael@0 | 1211 | return entry; |
michael@0 | 1212 | } |
michael@0 | 1213 | |
michael@0 | 1214 | // Seek match in locale resources |
michael@0 | 1215 | entry = findInStaticStore(src, trg, NO_VARIANT); |
michael@0 | 1216 | if (entry != 0) { |
michael@0 | 1217 | return entry; |
michael@0 | 1218 | } |
michael@0 | 1219 | if (!src.hasFallback()) { |
michael@0 | 1220 | break; |
michael@0 | 1221 | } |
michael@0 | 1222 | src.next(); |
michael@0 | 1223 | } |
michael@0 | 1224 | if (!trg.hasFallback()) { |
michael@0 | 1225 | break; |
michael@0 | 1226 | } |
michael@0 | 1227 | trg.next(); |
michael@0 | 1228 | } |
michael@0 | 1229 | |
michael@0 | 1230 | return 0; |
michael@0 | 1231 | } |
michael@0 | 1232 | |
michael@0 | 1233 | /** |
michael@0 | 1234 | * Given an Entry object, instantiate it. Caller owns result. Return |
michael@0 | 1235 | * 0 on failure. |
michael@0 | 1236 | * |
michael@0 | 1237 | * Return a non-empty aliasReturn value if the ID points to an alias. |
michael@0 | 1238 | * We cannot instantiate it ourselves because the alias may contain |
michael@0 | 1239 | * filters or compounds, which we do not understand. Caller should |
michael@0 | 1240 | * make aliasReturn empty before calling. |
michael@0 | 1241 | * |
michael@0 | 1242 | * The entry object is assumed to reside in the dynamic store. It may be |
michael@0 | 1243 | * modified. |
michael@0 | 1244 | */ |
michael@0 | 1245 | Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID, |
michael@0 | 1246 | TransliteratorEntry *entry, |
michael@0 | 1247 | TransliteratorAlias* &aliasReturn, |
michael@0 | 1248 | UErrorCode& status) { |
michael@0 | 1249 | Transliterator *t = 0; |
michael@0 | 1250 | U_ASSERT(aliasReturn == 0); |
michael@0 | 1251 | |
michael@0 | 1252 | switch (entry->entryType) { |
michael@0 | 1253 | case TransliteratorEntry::RBT_DATA: |
michael@0 | 1254 | t = new RuleBasedTransliterator(ID, entry->u.data); |
michael@0 | 1255 | if (t == 0) { |
michael@0 | 1256 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1257 | } |
michael@0 | 1258 | return t; |
michael@0 | 1259 | case TransliteratorEntry::PROTOTYPE: |
michael@0 | 1260 | t = entry->u.prototype->clone(); |
michael@0 | 1261 | if (t == 0) { |
michael@0 | 1262 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1263 | } |
michael@0 | 1264 | return t; |
michael@0 | 1265 | case TransliteratorEntry::ALIAS: |
michael@0 | 1266 | aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter); |
michael@0 | 1267 | if (aliasReturn == 0) { |
michael@0 | 1268 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1269 | } |
michael@0 | 1270 | return 0; |
michael@0 | 1271 | case TransliteratorEntry::FACTORY: |
michael@0 | 1272 | t = entry->u.factory.function(ID, entry->u.factory.context); |
michael@0 | 1273 | if (t == 0) { |
michael@0 | 1274 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1275 | } |
michael@0 | 1276 | return t; |
michael@0 | 1277 | case TransliteratorEntry::COMPOUND_RBT: |
michael@0 | 1278 | { |
michael@0 | 1279 | UVector* rbts = new UVector(entry->u.dataVector->size(), status); |
michael@0 | 1280 | // Check for null pointer |
michael@0 | 1281 | if (rbts == NULL) { |
michael@0 | 1282 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1283 | return NULL; |
michael@0 | 1284 | } |
michael@0 | 1285 | int32_t passNumber = 1; |
michael@0 | 1286 | for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) { |
michael@0 | 1287 | // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? |
michael@0 | 1288 | Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), |
michael@0 | 1289 | (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE); |
michael@0 | 1290 | if (t == 0) |
michael@0 | 1291 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1292 | else |
michael@0 | 1293 | rbts->addElement(t, status); |
michael@0 | 1294 | } |
michael@0 | 1295 | if (U_FAILURE(status)) { |
michael@0 | 1296 | delete rbts; |
michael@0 | 1297 | return 0; |
michael@0 | 1298 | } |
michael@0 | 1299 | aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter); |
michael@0 | 1300 | } |
michael@0 | 1301 | if (aliasReturn == 0) { |
michael@0 | 1302 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1303 | } |
michael@0 | 1304 | return 0; |
michael@0 | 1305 | case TransliteratorEntry::LOCALE_RULES: |
michael@0 | 1306 | aliasReturn = new TransliteratorAlias(ID, entry->stringArg, |
michael@0 | 1307 | (UTransDirection) entry->intArg); |
michael@0 | 1308 | if (aliasReturn == 0) { |
michael@0 | 1309 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1310 | } |
michael@0 | 1311 | return 0; |
michael@0 | 1312 | case TransliteratorEntry::RULES_FORWARD: |
michael@0 | 1313 | case TransliteratorEntry::RULES_REVERSE: |
michael@0 | 1314 | // Process the rule data into a TransliteratorRuleData object, |
michael@0 | 1315 | // and possibly also into an ::id header and/or footer. Then |
michael@0 | 1316 | // we modify the registry with the parsed data and retry. |
michael@0 | 1317 | { |
michael@0 | 1318 | TransliteratorParser parser(status); |
michael@0 | 1319 | |
michael@0 | 1320 | // We use the file name, taken from another resource bundle |
michael@0 | 1321 | // 2-d array at static init time, as a locale language. We're |
michael@0 | 1322 | // just using the locale mechanism to map through to a file |
michael@0 | 1323 | // name; this in no way represents an actual locale. |
michael@0 | 1324 | //CharString ch(entry->stringArg); |
michael@0 | 1325 | //UResourceBundle *bundle = ures_openDirect(0, ch, &status); |
michael@0 | 1326 | UnicodeString rules = entry->stringArg; |
michael@0 | 1327 | //ures_close(bundle); |
michael@0 | 1328 | |
michael@0 | 1329 | //if (U_FAILURE(status)) { |
michael@0 | 1330 | // We have a failure of some kind. Remove the ID from the |
michael@0 | 1331 | // registry so we don't keep trying. NOTE: This will throw off |
michael@0 | 1332 | // anyone who is, at the moment, trying to iterate over the |
michael@0 | 1333 | // available IDs. That's acceptable since we should never |
michael@0 | 1334 | // really get here except under installation, configuration, |
michael@0 | 1335 | // or unrecoverable run time memory failures. |
michael@0 | 1336 | // remove(ID); |
michael@0 | 1337 | //} else { |
michael@0 | 1338 | |
michael@0 | 1339 | // If the status indicates a failure, then we don't have any |
michael@0 | 1340 | // rules -- there is probably an installation error. The list |
michael@0 | 1341 | // in the root locale should correspond to all the installed |
michael@0 | 1342 | // transliterators; if it lists something that's not |
michael@0 | 1343 | // installed, we'll get an error from ResourceBundle. |
michael@0 | 1344 | aliasReturn = new TransliteratorAlias(ID, rules, |
michael@0 | 1345 | ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ? |
michael@0 | 1346 | UTRANS_REVERSE : UTRANS_FORWARD)); |
michael@0 | 1347 | if (aliasReturn == 0) { |
michael@0 | 1348 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1349 | } |
michael@0 | 1350 | //} |
michael@0 | 1351 | } |
michael@0 | 1352 | return 0; |
michael@0 | 1353 | default: |
michael@0 | 1354 | U_ASSERT(FALSE); // can't get here |
michael@0 | 1355 | return 0; |
michael@0 | 1356 | } |
michael@0 | 1357 | } |
michael@0 | 1358 | U_NAMESPACE_END |
michael@0 | 1359 | |
michael@0 | 1360 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
michael@0 | 1361 | |
michael@0 | 1362 | //eof |