intl/icu/source/common/unicode/unimatch.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
michael@0 3 **********************************************************************
michael@0 4 * Date Name Description
michael@0 5 * 07/18/01 aliu Creation.
michael@0 6 **********************************************************************
michael@0 7 */
michael@0 8 #ifndef UNIMATCH_H
michael@0 9 #define UNIMATCH_H
michael@0 10
michael@0 11 #include "unicode/utypes.h"
michael@0 12
michael@0 13 /**
michael@0 14 * \file
michael@0 15 * \brief C++ API: Unicode Matcher
michael@0 16 */
michael@0 17
michael@0 18
michael@0 19 U_NAMESPACE_BEGIN
michael@0 20
michael@0 21 class Replaceable;
michael@0 22 class UnicodeString;
michael@0 23 class UnicodeSet;
michael@0 24
michael@0 25 /**
michael@0 26 * Constants returned by <code>UnicodeMatcher::matches()</code>
michael@0 27 * indicating the degree of match.
michael@0 28 * @stable ICU 2.4
michael@0 29 */
michael@0 30 enum UMatchDegree {
michael@0 31 /**
michael@0 32 * Constant returned by <code>matches()</code> indicating a
michael@0 33 * mismatch between the text and this matcher. The text contains
michael@0 34 * a character which does not match, or the text does not contain
michael@0 35 * all desired characters for a non-incremental match.
michael@0 36 * @stable ICU 2.4
michael@0 37 */
michael@0 38 U_MISMATCH,
michael@0 39
michael@0 40 /**
michael@0 41 * Constant returned by <code>matches()</code> indicating a
michael@0 42 * partial match between the text and this matcher. This value is
michael@0 43 * only returned for incremental match operations. All characters
michael@0 44 * of the text match, but more characters are required for a
michael@0 45 * complete match. Alternatively, for variable-length matchers,
michael@0 46 * all characters of the text match, and if more characters were
michael@0 47 * supplied at limit, they might also match.
michael@0 48 * @stable ICU 2.4
michael@0 49 */
michael@0 50 U_PARTIAL_MATCH,
michael@0 51
michael@0 52 /**
michael@0 53 * Constant returned by <code>matches()</code> indicating a
michael@0 54 * complete match between the text and this matcher. For an
michael@0 55 * incremental variable-length match, this value is returned if
michael@0 56 * the given text matches, and it is known that additional
michael@0 57 * characters would not alter the extent of the match.
michael@0 58 * @stable ICU 2.4
michael@0 59 */
michael@0 60 U_MATCH
michael@0 61 };
michael@0 62
michael@0 63 /**
michael@0 64 * <code>UnicodeMatcher</code> defines a protocol for objects that can
michael@0 65 * match a range of characters in a Replaceable string.
michael@0 66 * @stable ICU 2.4
michael@0 67 */
michael@0 68 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
michael@0 69
michael@0 70 public:
michael@0 71 /**
michael@0 72 * Destructor.
michael@0 73 * @stable ICU 2.4
michael@0 74 */
michael@0 75 virtual ~UnicodeMatcher();
michael@0 76
michael@0 77 /**
michael@0 78 * Return a UMatchDegree value indicating the degree of match for
michael@0 79 * the given text at the given offset. Zero, one, or more
michael@0 80 * characters may be matched.
michael@0 81 *
michael@0 82 * Matching in the forward direction is indicated by limit >
michael@0 83 * offset. Characters from offset forwards to limit-1 will be
michael@0 84 * considered for matching.
michael@0 85 *
michael@0 86 * Matching in the reverse direction is indicated by limit <
michael@0 87 * offset. Characters from offset backwards to limit+1 will be
michael@0 88 * considered for matching.
michael@0 89 *
michael@0 90 * If limit == offset then the only match possible is a zero
michael@0 91 * character match (which subclasses may implement if desired).
michael@0 92 *
michael@0 93 * As a side effect, advance the offset parameter to the limit of
michael@0 94 * the matched substring. In the forward direction, this will be
michael@0 95 * the index of the last matched character plus one. In the
michael@0 96 * reverse direction, this will be the index of the last matched
michael@0 97 * character minus one.
michael@0 98 *
michael@0 99 * <p>Note: This method is not const because some classes may
michael@0 100 * modify their state as the result of a match.
michael@0 101 *
michael@0 102 * @param text the text to be matched
michael@0 103 * @param offset on input, the index into text at which to begin
michael@0 104 * matching. On output, the limit of the matched text. The
michael@0 105 * number of matched characters is the output value of offset
michael@0 106 * minus the input value. Offset should always point to the
michael@0 107 * HIGH SURROGATE (leading code unit) of a pair of surrogates,
michael@0 108 * both on entry and upon return.
michael@0 109 * @param limit the limit index of text to be matched. Greater
michael@0 110 * than offset for a forward direction match, less than offset for
michael@0 111 * a backward direction match. The last character to be
michael@0 112 * considered for matching will be text.charAt(limit-1) in the
michael@0 113 * forward direction or text.charAt(limit+1) in the backward
michael@0 114 * direction.
michael@0 115 * @param incremental if TRUE, then assume further characters may
michael@0 116 * be inserted at limit and check for partial matching. Otherwise
michael@0 117 * assume the text as given is complete.
michael@0 118 * @return a match degree value indicating a full match, a partial
michael@0 119 * match, or a mismatch. If incremental is FALSE then
michael@0 120 * U_PARTIAL_MATCH should never be returned.
michael@0 121 * @stable ICU 2.4
michael@0 122 */
michael@0 123 virtual UMatchDegree matches(const Replaceable& text,
michael@0 124 int32_t& offset,
michael@0 125 int32_t limit,
michael@0 126 UBool incremental) = 0;
michael@0 127
michael@0 128 /**
michael@0 129 * Returns a string representation of this matcher. If the result of
michael@0 130 * calling this function is passed to the appropriate parser, it
michael@0 131 * will produce another matcher that is equal to this one.
michael@0 132 * @param result the string to receive the pattern. Previous
michael@0 133 * contents will be deleted.
michael@0 134 * @param escapeUnprintable if TRUE then convert unprintable
michael@0 135 * character to their hex escape representations, \\uxxxx or
michael@0 136 * \\Uxxxxxxxx. Unprintable characters are those other than
michael@0 137 * U+000A, U+0020..U+007E.
michael@0 138 * @stable ICU 2.4
michael@0 139 */
michael@0 140 virtual UnicodeString& toPattern(UnicodeString& result,
michael@0 141 UBool escapeUnprintable = FALSE) const = 0;
michael@0 142
michael@0 143 /**
michael@0 144 * Returns TRUE if this matcher will match a character c, where c
michael@0 145 * & 0xFF == v, at offset, in the forward direction (with limit >
michael@0 146 * offset). This is used by <tt>RuleBasedTransliterator</tt> for
michael@0 147 * indexing.
michael@0 148 * @stable ICU 2.4
michael@0 149 */
michael@0 150 virtual UBool matchesIndexValue(uint8_t v) const = 0;
michael@0 151
michael@0 152 /**
michael@0 153 * Union the set of all characters that may be matched by this object
michael@0 154 * into the given set.
michael@0 155 * @param toUnionTo the set into which to union the source characters
michael@0 156 * @stable ICU 2.4
michael@0 157 */
michael@0 158 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
michael@0 159 };
michael@0 160
michael@0 161 U_NAMESPACE_END
michael@0 162
michael@0 163 #endif

mercurial