intl/icu/source/i18n/rbt_set.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/rbt_set.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,165 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +* Copyright (C) 1999-2007, International Business Machines Corporation
     1.7 +* and others. All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   Date        Name        Description
    1.10 +*   11/17/99    aliu        Creation.
    1.11 +**********************************************************************
    1.12 +*/
    1.13 +#ifndef RBT_SET_H
    1.14 +#define RBT_SET_H
    1.15 +
    1.16 +#include "unicode/utypes.h"
    1.17 +
    1.18 +#if !UCONFIG_NO_TRANSLITERATION
    1.19 +
    1.20 +#include "unicode/uobject.h"
    1.21 +#include "unicode/utrans.h"
    1.22 +#include "uvector.h"
    1.23 +
    1.24 +U_NAMESPACE_BEGIN
    1.25 +
    1.26 +class Replaceable;
    1.27 +class TransliterationRule;
    1.28 +class TransliterationRuleData;
    1.29 +class UnicodeFilter;
    1.30 +class UnicodeString;
    1.31 +class UnicodeSet;
    1.32 +
    1.33 +/**
    1.34 + * A set of rules for a <code>RuleBasedTransliterator</code>.
    1.35 + * @author Alan Liu
    1.36 + */
    1.37 +class TransliterationRuleSet : public UMemory {
    1.38 +    /**
    1.39 +     * Vector of rules, in the order added.  This is used while the
    1.40 +     * rule set is getting built.  After that, freeze() reorders and
    1.41 +     * indexes the rules into rules[].  Any given rule is stored once
    1.42 +     * in ruleVector, and one or more times in rules[].  ruleVector
    1.43 +     * owns and deletes the rules.
    1.44 +     */
    1.45 +    UVector* ruleVector;
    1.46 +
    1.47 +    /**
    1.48 +     * Sorted and indexed table of rules.  This is created by freeze()
    1.49 +     * from the rules in ruleVector.  It contains alias pointers to
    1.50 +     * the rules in ruleVector.  It is zero before freeze() is called
    1.51 +     * and non-zero thereafter.
    1.52 +     */
    1.53 +    TransliterationRule** rules;
    1.54 +
    1.55 +    /**
    1.56 +     * Index table.  For text having a first character c, compute x = c&0xFF.
    1.57 +     * Now use rules[index[x]..index[x+1]-1].  This index table is created by
    1.58 +     * freeze().  Before freeze() is called it contains garbage.
    1.59 +     */
    1.60 +    int32_t index[257];
    1.61 +
    1.62 +    /**
    1.63 +     * Length of the longest preceding context
    1.64 +     */
    1.65 +    int32_t maxContextLength;
    1.66 +
    1.67 +public:
    1.68 +
    1.69 +    /**
    1.70 +     * Construct a new empty rule set.
    1.71 +     * @param status    Output parameter filled in with success or failure status.
    1.72 +     */
    1.73 +    TransliterationRuleSet(UErrorCode& status);
    1.74 +
    1.75 +    /**
    1.76 +     * Copy constructor.
    1.77 +     */
    1.78 +    TransliterationRuleSet(const TransliterationRuleSet&);
    1.79 +
    1.80 +    /**
    1.81 +     * Destructor.
    1.82 +     */
    1.83 +    virtual ~TransliterationRuleSet();
    1.84 +
    1.85 +    /**
    1.86 +     * Change the data object that this rule belongs to.  Used
    1.87 +     * internally by the TransliterationRuleData copy constructor.
    1.88 +     * @param data    the new data value to be set.
    1.89 +     */
    1.90 +    void setData(const TransliterationRuleData* data);
    1.91 +
    1.92 +    /**
    1.93 +     * Return the maximum context length.
    1.94 +     * @return the length of the longest preceding context.
    1.95 +     */
    1.96 +    virtual int32_t getMaximumContextLength(void) const;
    1.97 +
    1.98 +    /**
    1.99 +     * Add a rule to this set.  Rules are added in order, and order is
   1.100 +     * significant.  The last call to this method must be followed by
   1.101 +     * a call to <code>freeze()</code> before the rule set is used.
   1.102 +     * This method must <em>not</em> be called after freeze() has been
   1.103 +     * called.
   1.104 +     *
   1.105 +     * @param adoptedRule the rule to add
   1.106 +     */
   1.107 +    virtual void addRule(TransliterationRule* adoptedRule,
   1.108 +                         UErrorCode& status);
   1.109 +
   1.110 +    /**
   1.111 +     * Check this for masked rules and index it to optimize performance.
   1.112 +     * The sequence of operations is: (1) add rules to a set using
   1.113 +     * <code>addRule()</code>; (2) freeze the set using
   1.114 +     * <code>freeze()</code>; (3) use the rule set.  If
   1.115 +     * <code>addRule()</code> is called after calling this method, it
   1.116 +     * invalidates this object, and this method must be called again.
   1.117 +     * That is, <code>freeze()</code> may be called multiple times,
   1.118 +     * although for optimal performance it shouldn't be.
   1.119 +     * @param parseError A pointer to UParseError to receive information about errors
   1.120 +     *                   occurred.
   1.121 +     * @param status     Output parameter filled in with success or failure status.
   1.122 +     */
   1.123 +    virtual void freeze(UParseError& parseError, UErrorCode& status);
   1.124 +    
   1.125 +    /**
   1.126 +     * Transliterate the given text with the given UTransPosition
   1.127 +     * indices.  Return TRUE if the transliteration should continue
   1.128 +     * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
   1.129 +     * Note that FALSE is only ever returned if isIncremental is TRUE.
   1.130 +     * @param text the text to be transliterated
   1.131 +     * @param index the position indices, which will be updated
   1.132 +     * @param isIncremental if TRUE, assume new text may be inserted
   1.133 +     * at index.limit, and return FALSE if thre is a partial match.
   1.134 +     * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
   1.135 +     * indicating that transliteration should stop until more text
   1.136 +     * arrives.
   1.137 +     */
   1.138 +    UBool transliterate(Replaceable& text,
   1.139 +                        UTransPosition& index,
   1.140 +                        UBool isIncremental);
   1.141 +
   1.142 +    /**
   1.143 +     * Create rule strings that represents this rule set.
   1.144 +     * @param result string to receive the rule strings.  Current
   1.145 +     * contents will be deleted.
   1.146 +     * @param escapeUnprintable  True, will escape the unprintable characters
   1.147 +     * @return    A reference to 'result'.
   1.148 +     */
   1.149 +    virtual UnicodeString& toRules(UnicodeString& result,
   1.150 +                                   UBool escapeUnprintable) const;
   1.151 +
   1.152 +    /**
   1.153 +     * Return the set of all characters that may be modified
   1.154 +     * (getTarget=false) or emitted (getTarget=true) by this set.
   1.155 +     */
   1.156 +    UnicodeSet& getSourceTargetSet(UnicodeSet& result,
   1.157 +                   UBool getTarget) const;
   1.158 +
   1.159 +private:
   1.160 +
   1.161 +    TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
   1.162 +};
   1.163 +
   1.164 +U_NAMESPACE_END
   1.165 +
   1.166 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
   1.167 +
   1.168 +#endif

mercurial