michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 1999-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 11/17/99 aliu Creation. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_TRANSLITERATION michael@0: michael@0: #include "unicode/rep.h" michael@0: #include "unicode/uniset.h" michael@0: #include "rbt_pars.h" michael@0: #include "rbt_data.h" michael@0: #include "rbt_rule.h" michael@0: #include "rbt.h" michael@0: #include "umutex.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator) michael@0: michael@0: static UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER; michael@0: static Replaceable *gLockedText = NULL; michael@0: michael@0: void RuleBasedTransliterator::_construct(const UnicodeString& rules, michael@0: UTransDirection direction, michael@0: UParseError& parseError, michael@0: UErrorCode& status) { michael@0: fData = 0; michael@0: isDataOwned = TRUE; michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: michael@0: TransliteratorParser parser(status); michael@0: parser.parse(rules, direction, parseError, status); michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: michael@0: if (parser.idBlockVector.size() != 0 || michael@0: parser.compoundFilter != NULL || michael@0: parser.dataVector.size() == 0) { michael@0: status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT michael@0: return; michael@0: } michael@0: michael@0: fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); michael@0: setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); michael@0: } michael@0: michael@0: /** michael@0: * Constructs a new transliterator from the given rules. michael@0: * @param id the id for the transliterator. michael@0: * @param rules rules, separated by ';' michael@0: * @param direction either FORWARD or REVERSE. michael@0: * @param adoptedFilter the filter for this transliterator. michael@0: * @param parseError Struct to recieve information on position michael@0: * of error if an error is encountered michael@0: * @param status Output param set to success/failure code. michael@0: * @exception IllegalArgumentException if rules are malformed michael@0: * or direction is invalid. michael@0: */ michael@0: RuleBasedTransliterator::RuleBasedTransliterator( michael@0: const UnicodeString& id, michael@0: const UnicodeString& rules, michael@0: UTransDirection direction, michael@0: UnicodeFilter* adoptedFilter, michael@0: UParseError& parseError, michael@0: UErrorCode& status) : michael@0: Transliterator(id, adoptedFilter) { michael@0: _construct(rules, direction,parseError,status); michael@0: } michael@0: michael@0: /** michael@0: * Constructs a new transliterator from the given rules. michael@0: * @param id the id for the transliterator. michael@0: * @param rules rules, separated by ';' michael@0: * @param direction either FORWARD or REVERSE. michael@0: * @param adoptedFilter the filter for this transliterator. michael@0: * @param status Output param set to success/failure code. michael@0: * @exception IllegalArgumentException if rules are malformed michael@0: * or direction is invalid. michael@0: */ michael@0: /*RuleBasedTransliterator::RuleBasedTransliterator( michael@0: const UnicodeString& id, michael@0: const UnicodeString& rules, michael@0: UTransDirection direction, michael@0: UnicodeFilter* adoptedFilter, michael@0: UErrorCode& status) : michael@0: Transliterator(id, adoptedFilter) { michael@0: UParseError parseError; michael@0: _construct(rules, direction,parseError, status); michael@0: }*/ michael@0: michael@0: /** michael@0: * Covenience constructor with no filter. michael@0: */ michael@0: /*RuleBasedTransliterator::RuleBasedTransliterator( michael@0: const UnicodeString& id, michael@0: const UnicodeString& rules, michael@0: UTransDirection direction, michael@0: UErrorCode& status) : michael@0: Transliterator(id, 0) { michael@0: UParseError parseError; michael@0: _construct(rules, direction,parseError, status); michael@0: }*/ michael@0: michael@0: /** michael@0: * Covenience constructor with no filter and FORWARD direction. michael@0: */ michael@0: /*RuleBasedTransliterator::RuleBasedTransliterator( michael@0: const UnicodeString& id, michael@0: const UnicodeString& rules, michael@0: UErrorCode& status) : michael@0: Transliterator(id, 0) { michael@0: UParseError parseError; michael@0: _construct(rules, UTRANS_FORWARD, parseError, status); michael@0: }*/ michael@0: michael@0: /** michael@0: * Covenience constructor with FORWARD direction. michael@0: */ michael@0: /*RuleBasedTransliterator::RuleBasedTransliterator( michael@0: const UnicodeString& id, michael@0: const UnicodeString& rules, michael@0: UnicodeFilter* adoptedFilter, michael@0: UErrorCode& status) : michael@0: Transliterator(id, adoptedFilter) { michael@0: UParseError parseError; michael@0: _construct(rules, UTRANS_FORWARD,parseError, status); michael@0: }*/ michael@0: michael@0: RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, michael@0: const TransliterationRuleData* theData, michael@0: UnicodeFilter* adoptedFilter) : michael@0: Transliterator(id, adoptedFilter), michael@0: fData((TransliterationRuleData*)theData), // cast away const michael@0: isDataOwned(FALSE) { michael@0: setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); michael@0: } michael@0: michael@0: /** michael@0: * Internal constructor. michael@0: */ michael@0: RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, michael@0: TransliterationRuleData* theData, michael@0: UBool isDataAdopted) : michael@0: Transliterator(id, 0), michael@0: fData(theData), michael@0: isDataOwned(isDataAdopted) { michael@0: setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); michael@0: } michael@0: michael@0: /** michael@0: * Copy constructor. michael@0: */ michael@0: RuleBasedTransliterator::RuleBasedTransliterator( michael@0: const RuleBasedTransliterator& other) : michael@0: Transliterator(other), fData(other.fData), michael@0: isDataOwned(other.isDataOwned) { michael@0: michael@0: // The data object may or may not be owned. If it is not owned we michael@0: // share it; it is invariant. If it is owned, it's still michael@0: // invariant, but we need to copy it to prevent double-deletion. michael@0: // If this becomes a performance issue (if people do a lot of RBT michael@0: // copying -- unlikely) we can reference count the data object. michael@0: michael@0: // Only do a deep copy if this is owned data, that is, data that michael@0: // will be later deleted. System transliterators contain michael@0: // non-owned data. michael@0: if (isDataOwned) { michael@0: fData = new TransliterationRuleData(*other.fData); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Destructor. michael@0: */ michael@0: RuleBasedTransliterator::~RuleBasedTransliterator() { michael@0: // Delete the data object only if we own it. michael@0: if (isDataOwned) { michael@0: delete fData; michael@0: } michael@0: } michael@0: michael@0: Transliterator* // Covariant return NOT ALLOWED (for portability) michael@0: RuleBasedTransliterator::clone(void) const { michael@0: return new RuleBasedTransliterator(*this); michael@0: } michael@0: michael@0: /** michael@0: * Implements {@link Transliterator#handleTransliterate}. michael@0: */ michael@0: void michael@0: RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index, michael@0: UBool isIncremental) const { michael@0: /* We keep contextStart and contextLimit fixed the entire time, michael@0: * relative to the text -- contextLimit may move numerically if michael@0: * text is inserted or removed. The start offset moves toward michael@0: * limit, with replacements happening under it. michael@0: * michael@0: * Example: rules 1. ab>x|y michael@0: * 2. yc>z michael@0: * michael@0: * |eabcd begin - no match, advance start michael@0: * e|abcd match rule 1 - change text & adjust start michael@0: * ex|ycd match rule 2 - change text & adjust start michael@0: * exz|d no match, advance start michael@0: * exzd| done michael@0: */ michael@0: michael@0: /* A rule like michael@0: * a>b|a michael@0: * creates an infinite loop. To prevent that, we put an arbitrary michael@0: * limit on the number of iterations that we take, one that is michael@0: * high enough that any reasonable rules are ok, but low enough to michael@0: * prevent a server from hanging. The limit is 16 times the michael@0: * number of characters n, unless n is so large that 16n exceeds a michael@0: * uint32_t. michael@0: */ michael@0: uint32_t loopCount = 0; michael@0: uint32_t loopLimit = index.limit - index.start; michael@0: if (loopLimit >= 0x10000000) { michael@0: loopLimit = 0xFFFFFFFF; michael@0: } else { michael@0: loopLimit <<= 4; michael@0: } michael@0: michael@0: // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent michael@0: // operations must be prevented. michael@0: // A Complication: compound transliterators can result in recursive entries to this michael@0: // function, sometimes with different "This" objects, always with the same text. michael@0: // Double-locking must be prevented in these cases. michael@0: // michael@0: michael@0: // If the transliteration data is exclusively owned by this transliterator object, michael@0: // we don't need to do any locking. No sharing between transliterators is possible, michael@0: // so no concurrent access from multiple threads is possible. michael@0: UBool lockedMutexAtThisLevel = FALSE; michael@0: if (isDataOwned == FALSE) { michael@0: // Test whether this request is operating on the same text string as some michael@0: // some other transliteration that is still in progress and holding the michael@0: // transliteration mutex. If so, do not lock the transliteration michael@0: // mutex again. michael@0: // TODO(andy): Need a better scheme for handling this. michael@0: UBool needToLock; michael@0: umtx_lock(NULL); michael@0: needToLock = (&text != gLockedText); michael@0: umtx_unlock(NULL); michael@0: if (needToLock) { michael@0: umtx_lock(&transliteratorDataMutex); michael@0: gLockedText = &text; michael@0: lockedMutexAtThisLevel = TRUE; michael@0: } michael@0: } michael@0: michael@0: // Check to make sure we don't dereference a null pointer. michael@0: if (fData != NULL) { michael@0: while (index.start < index.limit && michael@0: loopCount <= loopLimit && michael@0: fData->ruleSet.transliterate(text, index, isIncremental)) { michael@0: ++loopCount; michael@0: } michael@0: } michael@0: if (lockedMutexAtThisLevel) { michael@0: gLockedText = NULL; michael@0: umtx_unlock(&transliteratorDataMutex); michael@0: } michael@0: } michael@0: michael@0: UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource, michael@0: UBool escapeUnprintable) const { michael@0: return fData->ruleSet.toRules(rulesSource, escapeUnprintable); michael@0: } michael@0: michael@0: /** michael@0: * Implement Transliterator framework michael@0: */ michael@0: void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const { michael@0: fData->ruleSet.getSourceTargetSet(result, FALSE); michael@0: } michael@0: michael@0: /** michael@0: * Override Transliterator framework michael@0: */ michael@0: UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const { michael@0: return fData->ruleSet.getSourceTargetSet(result, TRUE); michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_TRANSLITERATION */