michael@0: /*
michael@0: **********************************************************************
michael@0: * Copyright (C) 1999-2007, International Business Machines Corporation
michael@0: * and others. All Rights Reserved.
michael@0: **********************************************************************
michael@0: * Date Name Description
michael@0: * 11/17/99 aliu Creation.
michael@0: **********************************************************************
michael@0: */
michael@0: #ifndef RBT_SET_H
michael@0: #define RBT_SET_H
michael@0:
michael@0: #include "unicode/utypes.h"
michael@0:
michael@0: #if !UCONFIG_NO_TRANSLITERATION
michael@0:
michael@0: #include "unicode/uobject.h"
michael@0: #include "unicode/utrans.h"
michael@0: #include "uvector.h"
michael@0:
michael@0: U_NAMESPACE_BEGIN
michael@0:
michael@0: class Replaceable;
michael@0: class TransliterationRule;
michael@0: class TransliterationRuleData;
michael@0: class UnicodeFilter;
michael@0: class UnicodeString;
michael@0: class UnicodeSet;
michael@0:
michael@0: /**
michael@0: * A set of rules for a RuleBasedTransliterator
.
michael@0: * @author Alan Liu
michael@0: */
michael@0: class TransliterationRuleSet : public UMemory {
michael@0: /**
michael@0: * Vector of rules, in the order added. This is used while the
michael@0: * rule set is getting built. After that, freeze() reorders and
michael@0: * indexes the rules into rules[]. Any given rule is stored once
michael@0: * in ruleVector, and one or more times in rules[]. ruleVector
michael@0: * owns and deletes the rules.
michael@0: */
michael@0: UVector* ruleVector;
michael@0:
michael@0: /**
michael@0: * Sorted and indexed table of rules. This is created by freeze()
michael@0: * from the rules in ruleVector. It contains alias pointers to
michael@0: * the rules in ruleVector. It is zero before freeze() is called
michael@0: * and non-zero thereafter.
michael@0: */
michael@0: TransliterationRule** rules;
michael@0:
michael@0: /**
michael@0: * Index table. For text having a first character c, compute x = c&0xFF.
michael@0: * Now use rules[index[x]..index[x+1]-1]. This index table is created by
michael@0: * freeze(). Before freeze() is called it contains garbage.
michael@0: */
michael@0: int32_t index[257];
michael@0:
michael@0: /**
michael@0: * Length of the longest preceding context
michael@0: */
michael@0: int32_t maxContextLength;
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * Construct a new empty rule set.
michael@0: * @param status Output parameter filled in with success or failure status.
michael@0: */
michael@0: TransliterationRuleSet(UErrorCode& status);
michael@0:
michael@0: /**
michael@0: * Copy constructor.
michael@0: */
michael@0: TransliterationRuleSet(const TransliterationRuleSet&);
michael@0:
michael@0: /**
michael@0: * Destructor.
michael@0: */
michael@0: virtual ~TransliterationRuleSet();
michael@0:
michael@0: /**
michael@0: * Change the data object that this rule belongs to. Used
michael@0: * internally by the TransliterationRuleData copy constructor.
michael@0: * @param data the new data value to be set.
michael@0: */
michael@0: void setData(const TransliterationRuleData* data);
michael@0:
michael@0: /**
michael@0: * Return the maximum context length.
michael@0: * @return the length of the longest preceding context.
michael@0: */
michael@0: virtual int32_t getMaximumContextLength(void) const;
michael@0:
michael@0: /**
michael@0: * Add a rule to this set. Rules are added in order, and order is
michael@0: * significant. The last call to this method must be followed by
michael@0: * a call to freeze()
before the rule set is used.
michael@0: * This method must not be called after freeze() has been
michael@0: * called.
michael@0: *
michael@0: * @param adoptedRule the rule to add
michael@0: */
michael@0: virtual void addRule(TransliterationRule* adoptedRule,
michael@0: UErrorCode& status);
michael@0:
michael@0: /**
michael@0: * Check this for masked rules and index it to optimize performance.
michael@0: * The sequence of operations is: (1) add rules to a set using
michael@0: * addRule()
; (2) freeze the set using
michael@0: * freeze()
; (3) use the rule set. If
michael@0: * addRule()
is called after calling this method, it
michael@0: * invalidates this object, and this method must be called again.
michael@0: * That is, freeze()
may be called multiple times,
michael@0: * although for optimal performance it shouldn't be.
michael@0: * @param parseError A pointer to UParseError to receive information about errors
michael@0: * occurred.
michael@0: * @param status Output parameter filled in with success or failure status.
michael@0: */
michael@0: virtual void freeze(UParseError& parseError, UErrorCode& status);
michael@0:
michael@0: /**
michael@0: * Transliterate the given text with the given UTransPosition
michael@0: * indices. Return TRUE if the transliteration should continue
michael@0: * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
michael@0: * Note that FALSE is only ever returned if isIncremental is TRUE.
michael@0: * @param text the text to be transliterated
michael@0: * @param index the position indices, which will be updated
michael@0: * @param isIncremental if TRUE, assume new text may be inserted
michael@0: * at index.limit, and return FALSE if thre is a partial match.
michael@0: * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
michael@0: * indicating that transliteration should stop until more text
michael@0: * arrives.
michael@0: */
michael@0: UBool transliterate(Replaceable& text,
michael@0: UTransPosition& index,
michael@0: UBool isIncremental);
michael@0:
michael@0: /**
michael@0: * Create rule strings that represents this rule set.
michael@0: * @param result string to receive the rule strings. Current
michael@0: * contents will be deleted.
michael@0: * @param escapeUnprintable True, will escape the unprintable characters
michael@0: * @return A reference to 'result'.
michael@0: */
michael@0: virtual UnicodeString& toRules(UnicodeString& result,
michael@0: UBool escapeUnprintable) const;
michael@0:
michael@0: /**
michael@0: * Return the set of all characters that may be modified
michael@0: * (getTarget=false) or emitted (getTarget=true) by this set.
michael@0: */
michael@0: UnicodeSet& getSourceTargetSet(UnicodeSet& result,
michael@0: UBool getTarget) const;
michael@0:
michael@0: private:
michael@0:
michael@0: TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
michael@0: };
michael@0:
michael@0: U_NAMESPACE_END
michael@0:
michael@0: #endif /* #if !UCONFIG_NO_TRANSLITERATION */
michael@0:
michael@0: #endif