The Tor Browser: intl/icu/source/i18n/strmatch.h@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*

     2  * Copyright (C) 2001-2011, International Business Machines Corporation

     3  * and others. All Rights Reserved.

     4  **********************************************************************

     5  *   Date        Name        Description

     6  *   07/23/01    aliu        Creation.

     7  **********************************************************************

     8  */

     9 #ifndef STRMATCH_H

    10 #define STRMATCH_H

    12 #include "unicode/utypes.h"

    14 #if !UCONFIG_NO_TRANSLITERATION

    16 #include "unicode/unistr.h"

    17 #include "unicode/unifunct.h"

    18 #include "unicode/unimatch.h"

    19 #include "unicode/unirepl.h"

    21 U_NAMESPACE_BEGIN

    23 class TransliterationRuleData;

    25 /**

    26  * An object that matches a fixed input string, implementing the

    27  * UnicodeMatcher API.  This object also implements the

    28  * UnicodeReplacer API, allowing it to emit the matched text as

    29  * output.  Since the match text may contain flexible match elements,

    30  * such as UnicodeSets, the emitted text is not the match pattern, but

    31  * instead a substring of the actual matched text.  Following

    32  * convention, the output text is the leftmost match seen up to this

    33  * point.

    34  *

    35  * A StringMatcher may represent a segment, in which case it has a

    36  * positive segment number.  This affects how the matcher converts

    37  * itself to a pattern but does not otherwise affect its function.

    38  *

    39  * A StringMatcher that is not a segment should not be used as a

    40  * UnicodeReplacer.

    41  */

    42 class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public UnicodeReplacer {

    44  public:

    46     /**

    47      * Construct a matcher that matches the given pattern string.

    48      * @param string the pattern to be matched, possibly containing

    49      * stand-ins that represent nested UnicodeMatcher objects.

    50      * @param start inclusive start index of text to be replaced

    51      * @param limit exclusive end index of text to be replaced;

    52      * must be greater than or equal to start

    53      * @param segmentNum the segment number from 1..n, or 0 if this is

    54      * not a segment.

    55      * @param data context object mapping stand-ins to

    56      * UnicodeMatcher objects.

    57      */

    58     StringMatcher(const UnicodeString& string,

    59                   int32_t start,

    60                   int32_t limit,

    61                   int32_t segmentNum,

    62                   const TransliterationRuleData& data);

    64     /**

    65      * Copy constructor

    66      * @param o  the object to be copied.

    67      */

    68     StringMatcher(const StringMatcher& o);

    70     /**

    71      * Destructor

    72      */

    73     virtual ~StringMatcher();

    75     /**

    76      * Implement UnicodeFunctor

    77      * @return a copy of the object.

    78      */

    79     virtual UnicodeFunctor* clone() const;

    81     /**

    82      * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer

    83      * and return the pointer.

    84      * @return the UnicodeMatcher point.

    85      */

    86     virtual UnicodeMatcher* toMatcher() const;

    88     /**

    89      * UnicodeFunctor API.  Cast 'this' to a UnicodeReplacer* pointer

    90      * and return the pointer.

    91      * @return the UnicodeReplacer pointer.

    92      */

    93     virtual UnicodeReplacer* toReplacer() const;

    95     /**

    96      * Implement UnicodeMatcher

    97      * @param text the text to be matched

    98      * @param offset on input, the index into text at which to begin

    99      * matching.  On output, the limit of the matched text.  The

   100      * number of matched characters is the output value of offset

   101      * minus the input value.  Offset should always point to the

   102      * HIGH SURROGATE (leading code unit) of a pair of surrogates,

   103      * both on entry and upon return.

   104      * @param limit the limit index of text to be matched.  Greater

   105      * than offset for a forward direction match, less than offset for

   106      * a backward direction match.  The last character to be

   107      * considered for matching will be text.charAt(limit-1) in the

   108      * forward direction or text.charAt(limit+1) in the backward

   109      * direction.

   110      * @param incremental  if TRUE, then assume further characters may

   111      * be inserted at limit and check for partial matching.  Otherwise

   112      * assume the text as given is complete.

   113      * @return a match degree value indicating a full match, a partial

   114      * match, or a mismatch.  If incremental is FALSE then

   115      * U_PARTIAL_MATCH should never be returned.

   116      */

   117     virtual UMatchDegree matches(const Replaceable& text,

   118                                  int32_t& offset,

   119                                  int32_t limit,

   120                                  UBool incremental);

   122     /**

   123      * Implement UnicodeMatcher

   124      * @param result            Output param to receive the pattern.

   125      * @param escapeUnprintable if True then escape the unprintable characters.

   126      * @return                  A reference to 'result'.

   127      */

   128     virtual UnicodeString& toPattern(UnicodeString& result,

   129                                      UBool escapeUnprintable = FALSE) const;

   131     /**

   132      * Implement UnicodeMatcher

   133      * Returns TRUE if this matcher will match a character c, where c

   134      * & 0xFF == v, at offset, in the forward direction (with limit >

   135      * offset).  This is used by <tt>RuleBasedTransliterator</tt> for

   136      * indexing.

   137      * @param v    the given value

   138      * @return     TRUE if this matcher will match a character c,

   139      *             where c & 0xFF == v

   140      */

   141     virtual UBool matchesIndexValue(uint8_t v) const;

   143     /**

   144      * Implement UnicodeMatcher

   145      */

   146     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;

   148     /**

   149      * Implement UnicodeFunctor

   150      */

   151     virtual void setData(const TransliterationRuleData*);

   153     /**

   154      * Replace characters in 'text' from 'start' to 'limit' with the

   155      * output text of this object.  Update the 'cursor' parameter to

   156      * give the cursor position and return the length of the

   157      * replacement text.

   158      *

   159      * @param text the text to be matched

   160      * @param start inclusive start index of text to be replaced

   161      * @param limit exclusive end index of text to be replaced;

   162      * must be greater than or equal to start

   163      * @param cursor output parameter for the cursor position.

   164      * Not all replacer objects will update this, but in a complete

   165      * tree of replacer objects, representing the entire output side

   166      * of a transliteration rule, at least one must update it.

   167      * @return the number of 16-bit code units in the text replacing

   168      * the characters at offsets start..(limit-1) in text

   169      */

   170     virtual int32_t replace(Replaceable& text,

   171                             int32_t start,

   172                             int32_t limit,

   173                             int32_t& cursor);

   175     /**

   176      * Returns a string representation of this replacer.  If the

   177      * result of calling this function is passed to the appropriate

   178      * parser, typically TransliteratorParser, it will produce another

   179      * replacer that is equal to this one.

   180      * @param result the string to receive the pattern.  Previous

   181      * contents will be deleted.

   182      * @param escapeUnprintable if TRUE then convert unprintable

   183      * character to their hex escape representations, \\uxxxx or

   184      * \\Uxxxxxxxx.  Unprintable characters are defined by

   185      * Utility.isUnprintable().

   186      * @return a reference to 'result'.

   187      */

   188     virtual UnicodeString& toReplacerPattern(UnicodeString& result,

   189                                              UBool escapeUnprintable) const;

   191     /**

   192      * Remove any match data.  This must be called before performing a

   193      * set of matches with this segment.

   194      */

   195     void resetMatch();

   197     /**

   198      * ICU "poor man's RTTI", returns a UClassID for the actual class.

   199      */

   200     virtual UClassID getDynamicClassID() const;

   202     /**

   203      * ICU "poor man's RTTI", returns a UClassID for this class.

   204      */

   205     static UClassID U_EXPORT2 getStaticClassID();

   207     /**

   208      * Union the set of all characters that may output by this object

   209      * into the given set.

   210      * @param toUnionTo the set into which to union the output characters

   211      */

   212     virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;

   214  private:

   216     /**

   217      * The text to be matched.

   218      */

   219     UnicodeString pattern;

   221     /**

   222      * Context object that maps stand-ins to matcher and replacer

   223      * objects.

   224      */

   225     const TransliterationRuleData* data;

   227     /**

   228      * The segment number, 1-based, or 0 if not a segment.

   229      */

   230     int32_t segmentNumber;

   232     /**

   233      * Start offset, in the match text, of the <em>rightmost</em>

   234      * match.

   235      */

   236     int32_t matchStart;

   238     /**

   239      * Limit offset, in the match text, of the <em>rightmost</em>

   240      * match.

   241      */

   242     int32_t matchLimit;

   244 };

   246 U_NAMESPACE_END

   248 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

   250 #endif

The Tor Browser / file revision

intl/icu/source/i18n/strmatch.h@6474c204b198

intl/icu/source/i18n/strmatch.h