Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* |
michael@0 | 2 | ********************************************************************** |
michael@0 | 3 | * Copyright (C) 1999-2007, International Business Machines Corporation |
michael@0 | 4 | * and others. All Rights Reserved. |
michael@0 | 5 | ********************************************************************** |
michael@0 | 6 | * Date Name Description |
michael@0 | 7 | * 11/17/99 aliu Creation. |
michael@0 | 8 | ********************************************************************** |
michael@0 | 9 | */ |
michael@0 | 10 | #ifndef RBT_SET_H |
michael@0 | 11 | #define RBT_SET_H |
michael@0 | 12 | |
michael@0 | 13 | #include "unicode/utypes.h" |
michael@0 | 14 | |
michael@0 | 15 | #if !UCONFIG_NO_TRANSLITERATION |
michael@0 | 16 | |
michael@0 | 17 | #include "unicode/uobject.h" |
michael@0 | 18 | #include "unicode/utrans.h" |
michael@0 | 19 | #include "uvector.h" |
michael@0 | 20 | |
michael@0 | 21 | U_NAMESPACE_BEGIN |
michael@0 | 22 | |
michael@0 | 23 | class Replaceable; |
michael@0 | 24 | class TransliterationRule; |
michael@0 | 25 | class TransliterationRuleData; |
michael@0 | 26 | class UnicodeFilter; |
michael@0 | 27 | class UnicodeString; |
michael@0 | 28 | class UnicodeSet; |
michael@0 | 29 | |
michael@0 | 30 | /** |
michael@0 | 31 | * A set of rules for a <code>RuleBasedTransliterator</code>. |
michael@0 | 32 | * @author Alan Liu |
michael@0 | 33 | */ |
michael@0 | 34 | class TransliterationRuleSet : public UMemory { |
michael@0 | 35 | /** |
michael@0 | 36 | * Vector of rules, in the order added. This is used while the |
michael@0 | 37 | * rule set is getting built. After that, freeze() reorders and |
michael@0 | 38 | * indexes the rules into rules[]. Any given rule is stored once |
michael@0 | 39 | * in ruleVector, and one or more times in rules[]. ruleVector |
michael@0 | 40 | * owns and deletes the rules. |
michael@0 | 41 | */ |
michael@0 | 42 | UVector* ruleVector; |
michael@0 | 43 | |
michael@0 | 44 | /** |
michael@0 | 45 | * Sorted and indexed table of rules. This is created by freeze() |
michael@0 | 46 | * from the rules in ruleVector. It contains alias pointers to |
michael@0 | 47 | * the rules in ruleVector. It is zero before freeze() is called |
michael@0 | 48 | * and non-zero thereafter. |
michael@0 | 49 | */ |
michael@0 | 50 | TransliterationRule** rules; |
michael@0 | 51 | |
michael@0 | 52 | /** |
michael@0 | 53 | * Index table. For text having a first character c, compute x = c&0xFF. |
michael@0 | 54 | * Now use rules[index[x]..index[x+1]-1]. This index table is created by |
michael@0 | 55 | * freeze(). Before freeze() is called it contains garbage. |
michael@0 | 56 | */ |
michael@0 | 57 | int32_t index[257]; |
michael@0 | 58 | |
michael@0 | 59 | /** |
michael@0 | 60 | * Length of the longest preceding context |
michael@0 | 61 | */ |
michael@0 | 62 | int32_t maxContextLength; |
michael@0 | 63 | |
michael@0 | 64 | public: |
michael@0 | 65 | |
michael@0 | 66 | /** |
michael@0 | 67 | * Construct a new empty rule set. |
michael@0 | 68 | * @param status Output parameter filled in with success or failure status. |
michael@0 | 69 | */ |
michael@0 | 70 | TransliterationRuleSet(UErrorCode& status); |
michael@0 | 71 | |
michael@0 | 72 | /** |
michael@0 | 73 | * Copy constructor. |
michael@0 | 74 | */ |
michael@0 | 75 | TransliterationRuleSet(const TransliterationRuleSet&); |
michael@0 | 76 | |
michael@0 | 77 | /** |
michael@0 | 78 | * Destructor. |
michael@0 | 79 | */ |
michael@0 | 80 | virtual ~TransliterationRuleSet(); |
michael@0 | 81 | |
michael@0 | 82 | /** |
michael@0 | 83 | * Change the data object that this rule belongs to. Used |
michael@0 | 84 | * internally by the TransliterationRuleData copy constructor. |
michael@0 | 85 | * @param data the new data value to be set. |
michael@0 | 86 | */ |
michael@0 | 87 | void setData(const TransliterationRuleData* data); |
michael@0 | 88 | |
michael@0 | 89 | /** |
michael@0 | 90 | * Return the maximum context length. |
michael@0 | 91 | * @return the length of the longest preceding context. |
michael@0 | 92 | */ |
michael@0 | 93 | virtual int32_t getMaximumContextLength(void) const; |
michael@0 | 94 | |
michael@0 | 95 | /** |
michael@0 | 96 | * Add a rule to this set. Rules are added in order, and order is |
michael@0 | 97 | * significant. The last call to this method must be followed by |
michael@0 | 98 | * a call to <code>freeze()</code> before the rule set is used. |
michael@0 | 99 | * This method must <em>not</em> be called after freeze() has been |
michael@0 | 100 | * called. |
michael@0 | 101 | * |
michael@0 | 102 | * @param adoptedRule the rule to add |
michael@0 | 103 | */ |
michael@0 | 104 | virtual void addRule(TransliterationRule* adoptedRule, |
michael@0 | 105 | UErrorCode& status); |
michael@0 | 106 | |
michael@0 | 107 | /** |
michael@0 | 108 | * Check this for masked rules and index it to optimize performance. |
michael@0 | 109 | * The sequence of operations is: (1) add rules to a set using |
michael@0 | 110 | * <code>addRule()</code>; (2) freeze the set using |
michael@0 | 111 | * <code>freeze()</code>; (3) use the rule set. If |
michael@0 | 112 | * <code>addRule()</code> is called after calling this method, it |
michael@0 | 113 | * invalidates this object, and this method must be called again. |
michael@0 | 114 | * That is, <code>freeze()</code> may be called multiple times, |
michael@0 | 115 | * although for optimal performance it shouldn't be. |
michael@0 | 116 | * @param parseError A pointer to UParseError to receive information about errors |
michael@0 | 117 | * occurred. |
michael@0 | 118 | * @param status Output parameter filled in with success or failure status. |
michael@0 | 119 | */ |
michael@0 | 120 | virtual void freeze(UParseError& parseError, UErrorCode& status); |
michael@0 | 121 | |
michael@0 | 122 | /** |
michael@0 | 123 | * Transliterate the given text with the given UTransPosition |
michael@0 | 124 | * indices. Return TRUE if the transliteration should continue |
michael@0 | 125 | * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). |
michael@0 | 126 | * Note that FALSE is only ever returned if isIncremental is TRUE. |
michael@0 | 127 | * @param text the text to be transliterated |
michael@0 | 128 | * @param index the position indices, which will be updated |
michael@0 | 129 | * @param isIncremental if TRUE, assume new text may be inserted |
michael@0 | 130 | * at index.limit, and return FALSE if thre is a partial match. |
michael@0 | 131 | * @return TRUE unless a U_PARTIAL_MATCH has been obtained, |
michael@0 | 132 | * indicating that transliteration should stop until more text |
michael@0 | 133 | * arrives. |
michael@0 | 134 | */ |
michael@0 | 135 | UBool transliterate(Replaceable& text, |
michael@0 | 136 | UTransPosition& index, |
michael@0 | 137 | UBool isIncremental); |
michael@0 | 138 | |
michael@0 | 139 | /** |
michael@0 | 140 | * Create rule strings that represents this rule set. |
michael@0 | 141 | * @param result string to receive the rule strings. Current |
michael@0 | 142 | * contents will be deleted. |
michael@0 | 143 | * @param escapeUnprintable True, will escape the unprintable characters |
michael@0 | 144 | * @return A reference to 'result'. |
michael@0 | 145 | */ |
michael@0 | 146 | virtual UnicodeString& toRules(UnicodeString& result, |
michael@0 | 147 | UBool escapeUnprintable) const; |
michael@0 | 148 | |
michael@0 | 149 | /** |
michael@0 | 150 | * Return the set of all characters that may be modified |
michael@0 | 151 | * (getTarget=false) or emitted (getTarget=true) by this set. |
michael@0 | 152 | */ |
michael@0 | 153 | UnicodeSet& getSourceTargetSet(UnicodeSet& result, |
michael@0 | 154 | UBool getTarget) const; |
michael@0 | 155 | |
michael@0 | 156 | private: |
michael@0 | 157 | |
michael@0 | 158 | TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class |
michael@0 | 159 | }; |
michael@0 | 160 | |
michael@0 | 161 | U_NAMESPACE_END |
michael@0 | 162 | |
michael@0 | 163 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
michael@0 | 164 | |
michael@0 | 165 | #endif |