|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 1999-2010, International Business Machines Corporation and others. |
|
4 * All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * Date Name Description |
|
7 * 11/17/99 aliu Creation. |
|
8 ********************************************************************** |
|
9 */ |
|
10 #ifndef UNIFILT_H |
|
11 #define UNIFILT_H |
|
12 |
|
13 #include "unicode/unifunct.h" |
|
14 #include "unicode/unimatch.h" |
|
15 |
|
16 /** |
|
17 * \file |
|
18 * \brief C++ API: Unicode Filter |
|
19 */ |
|
20 |
|
21 U_NAMESPACE_BEGIN |
|
22 |
|
23 /** |
|
24 * U_ETHER is used to represent character values for positions outside |
|
25 * a range. For example, transliterator uses this to represent |
|
26 * characters outside the range contextStart..contextLimit-1. This |
|
27 * allows explicit matching by rules and UnicodeSets of text outside a |
|
28 * defined range. |
|
29 * @stable ICU 3.0 |
|
30 */ |
|
31 #define U_ETHER ((UChar)0xFFFF) |
|
32 |
|
33 /** |
|
34 * |
|
35 * <code>UnicodeFilter</code> defines a protocol for selecting a |
|
36 * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. |
|
37 * Currently, filters are used in conjunction with classes like {@link |
|
38 * Transliterator} to only process selected characters through a |
|
39 * transformation. |
|
40 * |
|
41 * <p>Note: UnicodeFilter currently stubs out two pure virtual methods |
|
42 * of its base class, UnicodeMatcher. These methods are toPattern() |
|
43 * and matchesIndexValue(). This is done so that filter classes that |
|
44 * are not actually used as matchers -- specifically, those in the |
|
45 * UnicodeFilterLogic component, and those in tests -- can continue to |
|
46 * work without defining these methods. As long as a filter is not |
|
47 * used in an RBT during real transliteration, these methods will not |
|
48 * be called. However, this breaks the UnicodeMatcher base class |
|
49 * protocol, and it is not a correct solution. |
|
50 * |
|
51 * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter |
|
52 * hierarchy and either redesign it, or simply remove the stubs in |
|
53 * UnicodeFilter and force subclasses to implement the full |
|
54 * UnicodeMatcher protocol. |
|
55 * |
|
56 * @see UnicodeFilterLogic |
|
57 * @stable ICU 2.0 |
|
58 */ |
|
59 class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { |
|
60 |
|
61 public: |
|
62 /** |
|
63 * Destructor |
|
64 * @stable ICU 2.0 |
|
65 */ |
|
66 virtual ~UnicodeFilter(); |
|
67 |
|
68 /** |
|
69 * Returns <tt>true</tt> for characters that are in the selected |
|
70 * subset. In other words, if a character is <b>to be |
|
71 * filtered</b>, then <tt>contains()</tt> returns |
|
72 * <b><tt>false</tt></b>. |
|
73 * @stable ICU 2.0 |
|
74 */ |
|
75 virtual UBool contains(UChar32 c) const = 0; |
|
76 |
|
77 /** |
|
78 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer |
|
79 * and return the pointer. |
|
80 * @stable ICU 2.4 |
|
81 */ |
|
82 virtual UnicodeMatcher* toMatcher() const; |
|
83 |
|
84 /** |
|
85 * Implement UnicodeMatcher API. |
|
86 * @stable ICU 2.4 |
|
87 */ |
|
88 virtual UMatchDegree matches(const Replaceable& text, |
|
89 int32_t& offset, |
|
90 int32_t limit, |
|
91 UBool incremental); |
|
92 |
|
93 /** |
|
94 * UnicodeFunctor API. Nothing to do. |
|
95 * @stable ICU 2.4 |
|
96 */ |
|
97 virtual void setData(const TransliterationRuleData*); |
|
98 |
|
99 /** |
|
100 * ICU "poor man's RTTI", returns a UClassID for this class. |
|
101 * |
|
102 * @stable ICU 2.2 |
|
103 */ |
|
104 static UClassID U_EXPORT2 getStaticClassID(); |
|
105 |
|
106 protected: |
|
107 |
|
108 /* |
|
109 * Since this class has pure virtual functions, |
|
110 * a constructor can't be used. |
|
111 * @stable ICU 2.0 |
|
112 */ |
|
113 /* UnicodeFilter();*/ |
|
114 }; |
|
115 |
|
116 /*inline UnicodeFilter::UnicodeFilter() {}*/ |
|
117 |
|
118 U_NAMESPACE_END |
|
119 |
|
120 #endif |