intl/icu/source/i18n/rbt_data.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/rbt_data.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,152 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +* Copyright (C) 1999-2007, International Business Machines Corporation
     1.7 +* and others. All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   Date        Name        Description
    1.10 +*   11/17/99    aliu        Creation.
    1.11 +**********************************************************************
    1.12 +*/
    1.13 +#ifndef RBT_DATA_H
    1.14 +#define RBT_DATA_H
    1.15 +
    1.16 +#include "unicode/utypes.h"
    1.17 +#include "unicode/uclean.h"
    1.18 +
    1.19 +#if !UCONFIG_NO_TRANSLITERATION
    1.20 +
    1.21 +#include "unicode/uobject.h"
    1.22 +#include "rbt_set.h"
    1.23 +#include "hash.h"
    1.24 +
    1.25 +U_NAMESPACE_BEGIN
    1.26 +
    1.27 +class UnicodeFunctor;
    1.28 +class UnicodeMatcher;
    1.29 +class UnicodeReplacer;
    1.30 +
    1.31 +/**
    1.32 + * The rule data for a RuleBasedTransliterators.  RBT objects hold
    1.33 + * a const pointer to a TRD object that they do not own.  TRD objects
    1.34 + * are essentially the parsed rules in compact, usable form.  The
    1.35 + * TRD objects themselves are held for the life of the process in
    1.36 + * a static cache owned by Transliterator.
    1.37 + *
    1.38 + * This class' API is a little asymmetric.  There is a method to
    1.39 + * define a variable, but no way to define a set.  This is because the
    1.40 + * sets are defined by the parser in a UVector, and the vector is
    1.41 + * copied into a fixed-size array here.  Once this is done, no new
    1.42 + * sets may be defined.  In practice, there is no need to do so, since
    1.43 + * generating the data and using it are discrete phases.  When there
    1.44 + * is a need to access the set data during the parse phase, another
    1.45 + * data structure handles this.  See the parsing code for more
    1.46 + * details.
    1.47 + */
    1.48 +class TransliterationRuleData : public UMemory {
    1.49 +
    1.50 +public:
    1.51 +
    1.52 +    // PUBLIC DATA MEMBERS
    1.53 +
    1.54 +    /**
    1.55 +     * Rule table.  May be empty.
    1.56 +     */
    1.57 +    TransliterationRuleSet ruleSet;
    1.58 +
    1.59 +    /**
    1.60 +     * Map variable name (String) to variable (UnicodeString).  A variable name
    1.61 +     * corresponds to zero or more characters, stored in a UnicodeString in
    1.62 +     * this hash.  One or more of these chars may also correspond to a
    1.63 +     * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
    1.64 +     * a stand-in: it is an index for a secondary lookup in
    1.65 +     * data.variables.  The stand-in also represents the UnicodeMatcher in
    1.66 +     * the stored rules.
    1.67 +     */
    1.68 +    Hashtable variableNames;
    1.69 +
    1.70 +    /**
    1.71 +     * Map category variable (UChar) to set (UnicodeFunctor).
    1.72 +     * Variables that correspond to a set of characters are mapped
    1.73 +     * from variable name to a stand-in character in data.variableNames.
    1.74 +     * The stand-in then serves as a key in this hash to lookup the
    1.75 +     * actual UnicodeFunctor object.  In addition, the stand-in is
    1.76 +     * stored in the rule text to represent the set of characters.
    1.77 +     * variables[i] represents character (variablesBase + i).
    1.78 +     */
    1.79 +    UnicodeFunctor** variables;
    1.80 +
    1.81 +    /**
    1.82 +     * Flag that indicates whether the variables are owned (if a single
    1.83 +     * call to Transliterator::createFromRules() produces a CompoundTransliterator
    1.84 +     * with more than one RuleBasedTransliterator as children, they all share
    1.85 +     * the same variables list, so only the first one is considered to own
    1.86 +     * the variables)
    1.87 +     */
    1.88 +    UBool variablesAreOwned;
    1.89 +
    1.90 +    /**
    1.91 +     * The character that represents variables[0].  Characters
    1.92 +     * variablesBase through variablesBase +
    1.93 +     * variablesLength - 1 represent UnicodeFunctor objects.
    1.94 +     */
    1.95 +    UChar variablesBase;
    1.96 +
    1.97 +    /**
    1.98 +     * The length of variables.
    1.99 +     */
   1.100 +    int32_t variablesLength;
   1.101 +
   1.102 +public:
   1.103 +
   1.104 +    /**
   1.105 +     * Constructor
   1.106 +     * @param status Output param set to success/failure code on exit.
   1.107 +     */
   1.108 +    TransliterationRuleData(UErrorCode& status);
   1.109 +
   1.110 +    /**
   1.111 +     * Copy Constructor
   1.112 +     */
   1.113 +    TransliterationRuleData(const TransliterationRuleData&);
   1.114 +
   1.115 +    /**
   1.116 +     * destructor
   1.117 +     */
   1.118 +    ~TransliterationRuleData();
   1.119 +
   1.120 +    /**
   1.121 +     * Given a stand-in character, return the UnicodeFunctor that it
   1.122 +     * represents, or NULL if it doesn't represent anything.
   1.123 +     * @param standIn    the given stand-in character.
   1.124 +     * @return           the UnicodeFunctor that 'standIn' represents
   1.125 +     */
   1.126 +    UnicodeFunctor* lookup(UChar32 standIn) const;
   1.127 +
   1.128 +    /**
   1.129 +     * Given a stand-in character, return the UnicodeMatcher that it
   1.130 +     * represents, or NULL if it doesn't represent anything or if it
   1.131 +     * represents something that is not a matcher.
   1.132 +     * @param standIn    the given stand-in character.
   1.133 +     * @return           return the UnicodeMatcher that 'standIn' represents
   1.134 +     */
   1.135 +    UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
   1.136 +
   1.137 +    /**
   1.138 +     * Given a stand-in character, return the UnicodeReplacer that it
   1.139 +     * represents, or NULL if it doesn't represent anything or if it
   1.140 +     * represents something that is not a replacer.
   1.141 +     * @param standIn    the given stand-in character.
   1.142 +     * @return           return the UnicodeReplacer that 'standIn' represents
   1.143 +     */
   1.144 +    UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
   1.145 +
   1.146 +
   1.147 +private:
   1.148 +    TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
   1.149 +};
   1.150 +
   1.151 +U_NAMESPACE_END
   1.152 +
   1.153 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
   1.154 +
   1.155 +#endif

mercurial