1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/rbt_set.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,165 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 1999-2007, International Business Machines Corporation 1.7 +* and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* Date Name Description 1.10 +* 11/17/99 aliu Creation. 1.11 +********************************************************************** 1.12 +*/ 1.13 +#ifndef RBT_SET_H 1.14 +#define RBT_SET_H 1.15 + 1.16 +#include "unicode/utypes.h" 1.17 + 1.18 +#if !UCONFIG_NO_TRANSLITERATION 1.19 + 1.20 +#include "unicode/uobject.h" 1.21 +#include "unicode/utrans.h" 1.22 +#include "uvector.h" 1.23 + 1.24 +U_NAMESPACE_BEGIN 1.25 + 1.26 +class Replaceable; 1.27 +class TransliterationRule; 1.28 +class TransliterationRuleData; 1.29 +class UnicodeFilter; 1.30 +class UnicodeString; 1.31 +class UnicodeSet; 1.32 + 1.33 +/** 1.34 + * A set of rules for a <code>RuleBasedTransliterator</code>. 1.35 + * @author Alan Liu 1.36 + */ 1.37 +class TransliterationRuleSet : public UMemory { 1.38 + /** 1.39 + * Vector of rules, in the order added. This is used while the 1.40 + * rule set is getting built. After that, freeze() reorders and 1.41 + * indexes the rules into rules[]. Any given rule is stored once 1.42 + * in ruleVector, and one or more times in rules[]. ruleVector 1.43 + * owns and deletes the rules. 1.44 + */ 1.45 + UVector* ruleVector; 1.46 + 1.47 + /** 1.48 + * Sorted and indexed table of rules. This is created by freeze() 1.49 + * from the rules in ruleVector. It contains alias pointers to 1.50 + * the rules in ruleVector. It is zero before freeze() is called 1.51 + * and non-zero thereafter. 1.52 + */ 1.53 + TransliterationRule** rules; 1.54 + 1.55 + /** 1.56 + * Index table. For text having a first character c, compute x = c&0xFF. 1.57 + * Now use rules[index[x]..index[x+1]-1]. This index table is created by 1.58 + * freeze(). Before freeze() is called it contains garbage. 1.59 + */ 1.60 + int32_t index[257]; 1.61 + 1.62 + /** 1.63 + * Length of the longest preceding context 1.64 + */ 1.65 + int32_t maxContextLength; 1.66 + 1.67 +public: 1.68 + 1.69 + /** 1.70 + * Construct a new empty rule set. 1.71 + * @param status Output parameter filled in with success or failure status. 1.72 + */ 1.73 + TransliterationRuleSet(UErrorCode& status); 1.74 + 1.75 + /** 1.76 + * Copy constructor. 1.77 + */ 1.78 + TransliterationRuleSet(const TransliterationRuleSet&); 1.79 + 1.80 + /** 1.81 + * Destructor. 1.82 + */ 1.83 + virtual ~TransliterationRuleSet(); 1.84 + 1.85 + /** 1.86 + * Change the data object that this rule belongs to. Used 1.87 + * internally by the TransliterationRuleData copy constructor. 1.88 + * @param data the new data value to be set. 1.89 + */ 1.90 + void setData(const TransliterationRuleData* data); 1.91 + 1.92 + /** 1.93 + * Return the maximum context length. 1.94 + * @return the length of the longest preceding context. 1.95 + */ 1.96 + virtual int32_t getMaximumContextLength(void) const; 1.97 + 1.98 + /** 1.99 + * Add a rule to this set. Rules are added in order, and order is 1.100 + * significant. The last call to this method must be followed by 1.101 + * a call to <code>freeze()</code> before the rule set is used. 1.102 + * This method must <em>not</em> be called after freeze() has been 1.103 + * called. 1.104 + * 1.105 + * @param adoptedRule the rule to add 1.106 + */ 1.107 + virtual void addRule(TransliterationRule* adoptedRule, 1.108 + UErrorCode& status); 1.109 + 1.110 + /** 1.111 + * Check this for masked rules and index it to optimize performance. 1.112 + * The sequence of operations is: (1) add rules to a set using 1.113 + * <code>addRule()</code>; (2) freeze the set using 1.114 + * <code>freeze()</code>; (3) use the rule set. If 1.115 + * <code>addRule()</code> is called after calling this method, it 1.116 + * invalidates this object, and this method must be called again. 1.117 + * That is, <code>freeze()</code> may be called multiple times, 1.118 + * although for optimal performance it shouldn't be. 1.119 + * @param parseError A pointer to UParseError to receive information about errors 1.120 + * occurred. 1.121 + * @param status Output parameter filled in with success or failure status. 1.122 + */ 1.123 + virtual void freeze(UParseError& parseError, UErrorCode& status); 1.124 + 1.125 + /** 1.126 + * Transliterate the given text with the given UTransPosition 1.127 + * indices. Return TRUE if the transliteration should continue 1.128 + * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). 1.129 + * Note that FALSE is only ever returned if isIncremental is TRUE. 1.130 + * @param text the text to be transliterated 1.131 + * @param index the position indices, which will be updated 1.132 + * @param isIncremental if TRUE, assume new text may be inserted 1.133 + * at index.limit, and return FALSE if thre is a partial match. 1.134 + * @return TRUE unless a U_PARTIAL_MATCH has been obtained, 1.135 + * indicating that transliteration should stop until more text 1.136 + * arrives. 1.137 + */ 1.138 + UBool transliterate(Replaceable& text, 1.139 + UTransPosition& index, 1.140 + UBool isIncremental); 1.141 + 1.142 + /** 1.143 + * Create rule strings that represents this rule set. 1.144 + * @param result string to receive the rule strings. Current 1.145 + * contents will be deleted. 1.146 + * @param escapeUnprintable True, will escape the unprintable characters 1.147 + * @return A reference to 'result'. 1.148 + */ 1.149 + virtual UnicodeString& toRules(UnicodeString& result, 1.150 + UBool escapeUnprintable) const; 1.151 + 1.152 + /** 1.153 + * Return the set of all characters that may be modified 1.154 + * (getTarget=false) or emitted (getTarget=true) by this set. 1.155 + */ 1.156 + UnicodeSet& getSourceTargetSet(UnicodeSet& result, 1.157 + UBool getTarget) const; 1.158 + 1.159 +private: 1.160 + 1.161 + TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class 1.162 +}; 1.163 + 1.164 +U_NAMESPACE_END 1.165 + 1.166 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1.167 + 1.168 +#endif