michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 1999-2007, International Business Machines Corporation michael@0: * and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 11/17/99 aliu Creation. michael@0: ********************************************************************** michael@0: */ michael@0: #ifndef RBT_SET_H michael@0: #define RBT_SET_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_TRANSLITERATION michael@0: michael@0: #include "unicode/uobject.h" michael@0: #include "unicode/utrans.h" michael@0: #include "uvector.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class Replaceable; michael@0: class TransliterationRule; michael@0: class TransliterationRuleData; michael@0: class UnicodeFilter; michael@0: class UnicodeString; michael@0: class UnicodeSet; michael@0: michael@0: /** michael@0: * A set of rules for a RuleBasedTransliterator. michael@0: * @author Alan Liu michael@0: */ michael@0: class TransliterationRuleSet : public UMemory { michael@0: /** michael@0: * Vector of rules, in the order added. This is used while the michael@0: * rule set is getting built. After that, freeze() reorders and michael@0: * indexes the rules into rules[]. Any given rule is stored once michael@0: * in ruleVector, and one or more times in rules[]. ruleVector michael@0: * owns and deletes the rules. michael@0: */ michael@0: UVector* ruleVector; michael@0: michael@0: /** michael@0: * Sorted and indexed table of rules. This is created by freeze() michael@0: * from the rules in ruleVector. It contains alias pointers to michael@0: * the rules in ruleVector. It is zero before freeze() is called michael@0: * and non-zero thereafter. michael@0: */ michael@0: TransliterationRule** rules; michael@0: michael@0: /** michael@0: * Index table. For text having a first character c, compute x = c&0xFF. michael@0: * Now use rules[index[x]..index[x+1]-1]. This index table is created by michael@0: * freeze(). Before freeze() is called it contains garbage. michael@0: */ michael@0: int32_t index[257]; michael@0: michael@0: /** michael@0: * Length of the longest preceding context michael@0: */ michael@0: int32_t maxContextLength; michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Construct a new empty rule set. michael@0: * @param status Output parameter filled in with success or failure status. michael@0: */ michael@0: TransliterationRuleSet(UErrorCode& status); michael@0: michael@0: /** michael@0: * Copy constructor. michael@0: */ michael@0: TransliterationRuleSet(const TransliterationRuleSet&); michael@0: michael@0: /** michael@0: * Destructor. michael@0: */ michael@0: virtual ~TransliterationRuleSet(); michael@0: michael@0: /** michael@0: * Change the data object that this rule belongs to. Used michael@0: * internally by the TransliterationRuleData copy constructor. michael@0: * @param data the new data value to be set. michael@0: */ michael@0: void setData(const TransliterationRuleData* data); michael@0: michael@0: /** michael@0: * Return the maximum context length. michael@0: * @return the length of the longest preceding context. michael@0: */ michael@0: virtual int32_t getMaximumContextLength(void) const; michael@0: michael@0: /** michael@0: * Add a rule to this set. Rules are added in order, and order is michael@0: * significant. The last call to this method must be followed by michael@0: * a call to freeze() before the rule set is used. michael@0: * This method must not be called after freeze() has been michael@0: * called. michael@0: * michael@0: * @param adoptedRule the rule to add michael@0: */ michael@0: virtual void addRule(TransliterationRule* adoptedRule, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * Check this for masked rules and index it to optimize performance. michael@0: * The sequence of operations is: (1) add rules to a set using michael@0: * addRule(); (2) freeze the set using michael@0: * freeze(); (3) use the rule set. If michael@0: * addRule() is called after calling this method, it michael@0: * invalidates this object, and this method must be called again. michael@0: * That is, freeze() may be called multiple times, michael@0: * although for optimal performance it shouldn't be. michael@0: * @param parseError A pointer to UParseError to receive information about errors michael@0: * occurred. michael@0: * @param status Output parameter filled in with success or failure status. michael@0: */ michael@0: virtual void freeze(UParseError& parseError, UErrorCode& status); michael@0: michael@0: /** michael@0: * Transliterate the given text with the given UTransPosition michael@0: * indices. Return TRUE if the transliteration should continue michael@0: * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). michael@0: * Note that FALSE is only ever returned if isIncremental is TRUE. michael@0: * @param text the text to be transliterated michael@0: * @param index the position indices, which will be updated michael@0: * @param isIncremental if TRUE, assume new text may be inserted michael@0: * at index.limit, and return FALSE if thre is a partial match. michael@0: * @return TRUE unless a U_PARTIAL_MATCH has been obtained, michael@0: * indicating that transliteration should stop until more text michael@0: * arrives. michael@0: */ michael@0: UBool transliterate(Replaceable& text, michael@0: UTransPosition& index, michael@0: UBool isIncremental); michael@0: michael@0: /** michael@0: * Create rule strings that represents this rule set. michael@0: * @param result string to receive the rule strings. Current michael@0: * contents will be deleted. michael@0: * @param escapeUnprintable True, will escape the unprintable characters michael@0: * @return A reference to 'result'. michael@0: */ michael@0: virtual UnicodeString& toRules(UnicodeString& result, michael@0: UBool escapeUnprintable) const; michael@0: michael@0: /** michael@0: * Return the set of all characters that may be modified michael@0: * (getTarget=false) or emitted (getTarget=true) by this set. michael@0: */ michael@0: UnicodeSet& getSourceTargetSet(UnicodeSet& result, michael@0: UBool getTarget) const; michael@0: michael@0: private: michael@0: michael@0: TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class michael@0: }; michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_TRANSLITERATION */ michael@0: michael@0: #endif