intl/icu/source/common/unicode/unimatch.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/unimatch.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,163 @@
     1.4 +/*
     1.5 +* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
     1.6 +**********************************************************************
     1.7 +*   Date        Name        Description
     1.8 +*   07/18/01    aliu        Creation.
     1.9 +**********************************************************************
    1.10 +*/
    1.11 +#ifndef UNIMATCH_H
    1.12 +#define UNIMATCH_H
    1.13 +
    1.14 +#include "unicode/utypes.h"
    1.15 +
    1.16 +/**
    1.17 + * \file 
    1.18 + * \brief C++ API: Unicode Matcher
    1.19 + */
    1.20 +
    1.21 +
    1.22 +U_NAMESPACE_BEGIN
    1.23 +
    1.24 +class Replaceable;
    1.25 +class UnicodeString;
    1.26 +class UnicodeSet;
    1.27 +
    1.28 +/**
    1.29 + * Constants returned by <code>UnicodeMatcher::matches()</code>
    1.30 + * indicating the degree of match.
    1.31 + * @stable ICU 2.4
    1.32 + */
    1.33 +enum UMatchDegree {
    1.34 +    /**
    1.35 +     * Constant returned by <code>matches()</code> indicating a
    1.36 +     * mismatch between the text and this matcher.  The text contains
    1.37 +     * a character which does not match, or the text does not contain
    1.38 +     * all desired characters for a non-incremental match.
    1.39 +     * @stable ICU 2.4
    1.40 +     */
    1.41 +    U_MISMATCH,
    1.42 +    
    1.43 +    /**
    1.44 +     * Constant returned by <code>matches()</code> indicating a
    1.45 +     * partial match between the text and this matcher.  This value is
    1.46 +     * only returned for incremental match operations.  All characters
    1.47 +     * of the text match, but more characters are required for a
    1.48 +     * complete match.  Alternatively, for variable-length matchers,
    1.49 +     * all characters of the text match, and if more characters were
    1.50 +     * supplied at limit, they might also match.
    1.51 +     * @stable ICU 2.4
    1.52 +     */
    1.53 +    U_PARTIAL_MATCH,
    1.54 +    
    1.55 +    /**
    1.56 +     * Constant returned by <code>matches()</code> indicating a
    1.57 +     * complete match between the text and this matcher.  For an
    1.58 +     * incremental variable-length match, this value is returned if
    1.59 +     * the given text matches, and it is known that additional
    1.60 +     * characters would not alter the extent of the match.
    1.61 +     * @stable ICU 2.4
    1.62 +     */
    1.63 +    U_MATCH
    1.64 +};
    1.65 +
    1.66 +/**
    1.67 + * <code>UnicodeMatcher</code> defines a protocol for objects that can
    1.68 + * match a range of characters in a Replaceable string.
    1.69 + * @stable ICU 2.4
    1.70 + */
    1.71 +class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
    1.72 +
    1.73 +public:
    1.74 +    /**
    1.75 +     * Destructor.
    1.76 +     * @stable ICU 2.4
    1.77 +     */
    1.78 +    virtual ~UnicodeMatcher();
    1.79 +
    1.80 +    /**
    1.81 +     * Return a UMatchDegree value indicating the degree of match for
    1.82 +     * the given text at the given offset.  Zero, one, or more
    1.83 +     * characters may be matched.
    1.84 +     *
    1.85 +     * Matching in the forward direction is indicated by limit >
    1.86 +     * offset.  Characters from offset forwards to limit-1 will be
    1.87 +     * considered for matching.
    1.88 +     * 
    1.89 +     * Matching in the reverse direction is indicated by limit <
    1.90 +     * offset.  Characters from offset backwards to limit+1 will be
    1.91 +     * considered for matching.
    1.92 +     *
    1.93 +     * If limit == offset then the only match possible is a zero
    1.94 +     * character match (which subclasses may implement if desired).
    1.95 +     *
    1.96 +     * As a side effect, advance the offset parameter to the limit of
    1.97 +     * the matched substring.  In the forward direction, this will be
    1.98 +     * the index of the last matched character plus one.  In the
    1.99 +     * reverse direction, this will be the index of the last matched
   1.100 +     * character minus one.
   1.101 +     *
   1.102 +     * <p>Note:  This method is not const because some classes may
   1.103 +     * modify their state as the result of a match.
   1.104 +     *
   1.105 +     * @param text the text to be matched
   1.106 +     * @param offset on input, the index into text at which to begin
   1.107 +     * matching.  On output, the limit of the matched text.  The
   1.108 +     * number of matched characters is the output value of offset
   1.109 +     * minus the input value.  Offset should always point to the
   1.110 +     * HIGH SURROGATE (leading code unit) of a pair of surrogates,
   1.111 +     * both on entry and upon return.
   1.112 +     * @param limit the limit index of text to be matched.  Greater
   1.113 +     * than offset for a forward direction match, less than offset for
   1.114 +     * a backward direction match.  The last character to be
   1.115 +     * considered for matching will be text.charAt(limit-1) in the
   1.116 +     * forward direction or text.charAt(limit+1) in the backward
   1.117 +     * direction.
   1.118 +     * @param incremental if TRUE, then assume further characters may
   1.119 +     * be inserted at limit and check for partial matching.  Otherwise
   1.120 +     * assume the text as given is complete.
   1.121 +     * @return a match degree value indicating a full match, a partial
   1.122 +     * match, or a mismatch.  If incremental is FALSE then
   1.123 +     * U_PARTIAL_MATCH should never be returned.
   1.124 +     * @stable ICU 2.4
   1.125 +     */
   1.126 +    virtual UMatchDegree matches(const Replaceable& text,
   1.127 +                                 int32_t& offset,
   1.128 +                                 int32_t limit,
   1.129 +                                 UBool incremental) = 0;
   1.130 +
   1.131 +    /**
   1.132 +     * Returns a string representation of this matcher.  If the result of
   1.133 +     * calling this function is passed to the appropriate parser, it
   1.134 +     * will produce another matcher that is equal to this one.
   1.135 +     * @param result the string to receive the pattern.  Previous
   1.136 +     * contents will be deleted.
   1.137 +     * @param escapeUnprintable if TRUE then convert unprintable
   1.138 +     * character to their hex escape representations, \\uxxxx or
   1.139 +     * \\Uxxxxxxxx.  Unprintable characters are those other than
   1.140 +     * U+000A, U+0020..U+007E.
   1.141 +     * @stable ICU 2.4
   1.142 +     */
   1.143 +    virtual UnicodeString& toPattern(UnicodeString& result,
   1.144 +                                     UBool escapeUnprintable = FALSE) const = 0;
   1.145 +
   1.146 +    /**
   1.147 +     * Returns TRUE if this matcher will match a character c, where c
   1.148 +     * & 0xFF == v, at offset, in the forward direction (with limit >
   1.149 +     * offset).  This is used by <tt>RuleBasedTransliterator</tt> for
   1.150 +     * indexing.
   1.151 +     * @stable ICU 2.4
   1.152 +     */
   1.153 +    virtual UBool matchesIndexValue(uint8_t v) const = 0;
   1.154 +
   1.155 +    /**
   1.156 +     * Union the set of all characters that may be matched by this object
   1.157 +     * into the given set.
   1.158 +     * @param toUnionTo the set into which to union the source characters
   1.159 +     * @stable ICU 2.4
   1.160 +     */
   1.161 +    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
   1.162 +};
   1.163 +
   1.164 +U_NAMESPACE_END
   1.165 +
   1.166 +#endif

mercurial