|
1 /* |
|
2 ************************************************************************** |
|
3 * Copyright (C) 1999-2012, International Business Machines Corporation and |
|
4 * others. All Rights Reserved. |
|
5 ************************************************************************** |
|
6 * Date Name Description |
|
7 * 11/17/99 aliu Creation. Ported from java. Modified to |
|
8 * match current UnicodeString API. Forced |
|
9 * to use name "handleReplaceBetween" because |
|
10 * of existing methods in UnicodeString. |
|
11 ************************************************************************** |
|
12 */ |
|
13 |
|
14 #ifndef REP_H |
|
15 #define REP_H |
|
16 |
|
17 #include "unicode/uobject.h" |
|
18 |
|
19 /** |
|
20 * \file |
|
21 * \brief C++ API: Replaceable String |
|
22 */ |
|
23 |
|
24 U_NAMESPACE_BEGIN |
|
25 |
|
26 class UnicodeString; |
|
27 |
|
28 /** |
|
29 * <code>Replaceable</code> is an abstract base class representing a |
|
30 * string of characters that supports the replacement of a range of |
|
31 * itself with a new string of characters. It is used by APIs that |
|
32 * change a piece of text while retaining metadata. Metadata is data |
|
33 * other than the Unicode characters returned by char32At(). One |
|
34 * example of metadata is style attributes; another is an edit |
|
35 * history, marking each character with an author and revision number. |
|
36 * |
|
37 * <p>An implicit aspect of the <code>Replaceable</code> API is that |
|
38 * during a replace operation, new characters take on the metadata of |
|
39 * the old characters. For example, if the string "the <b>bold</b> |
|
40 * font" has range (4, 8) replaced with "strong", then it becomes "the |
|
41 * <b>strong</b> font". |
|
42 * |
|
43 * <p><code>Replaceable</code> specifies ranges using a start |
|
44 * offset and a limit offset. The range of characters thus specified |
|
45 * includes the characters at offset start..limit-1. That is, the |
|
46 * start offset is inclusive, and the limit offset is exclusive. |
|
47 * |
|
48 * <p><code>Replaceable</code> also includes API to access characters |
|
49 * in the string: <code>length()</code>, <code>charAt()</code>, |
|
50 * <code>char32At()</code>, and <code>extractBetween()</code>. |
|
51 * |
|
52 * <p>For a subclass to support metadata, typical behavior of |
|
53 * <code>replace()</code> is the following: |
|
54 * <ul> |
|
55 * <li>Set the metadata of the new text to the metadata of the first |
|
56 * character replaced</li> |
|
57 * <li>If no characters are replaced, use the metadata of the |
|
58 * previous character</li> |
|
59 * <li>If there is no previous character (i.e. start == 0), use the |
|
60 * following character</li> |
|
61 * <li>If there is no following character (i.e. the replaceable was |
|
62 * empty), use default metadata.<br> |
|
63 * <li>If the code point U+FFFF is seen, it should be interpreted as |
|
64 * a special marker having no metadata<li> |
|
65 * </li> |
|
66 * </ul> |
|
67 * If this is not the behavior, the subclass should document any differences. |
|
68 * @author Alan Liu |
|
69 * @stable ICU 2.0 |
|
70 */ |
|
71 class U_COMMON_API Replaceable : public UObject { |
|
72 |
|
73 public: |
|
74 /** |
|
75 * Destructor. |
|
76 * @stable ICU 2.0 |
|
77 */ |
|
78 virtual ~Replaceable(); |
|
79 |
|
80 /** |
|
81 * Returns the number of 16-bit code units in the text. |
|
82 * @return number of 16-bit code units in text |
|
83 * @stable ICU 1.8 |
|
84 */ |
|
85 inline int32_t length() const; |
|
86 |
|
87 /** |
|
88 * Returns the 16-bit code unit at the given offset into the text. |
|
89 * @param offset an integer between 0 and <code>length()</code>-1 |
|
90 * inclusive |
|
91 * @return 16-bit code unit of text at given offset |
|
92 * @stable ICU 1.8 |
|
93 */ |
|
94 inline UChar charAt(int32_t offset) const; |
|
95 |
|
96 /** |
|
97 * Returns the 32-bit code point at the given 16-bit offset into |
|
98 * the text. This assumes the text is stored as 16-bit code units |
|
99 * with surrogate pairs intermixed. If the offset of a leading or |
|
100 * trailing code unit of a surrogate pair is given, return the |
|
101 * code point of the surrogate pair. |
|
102 * |
|
103 * @param offset an integer between 0 and <code>length()</code>-1 |
|
104 * inclusive |
|
105 * @return 32-bit code point of text at given offset |
|
106 * @stable ICU 1.8 |
|
107 */ |
|
108 inline UChar32 char32At(int32_t offset) const; |
|
109 |
|
110 /** |
|
111 * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) |
|
112 * into the UnicodeString <tt>target</tt>. |
|
113 * @param start offset of first character which will be copied |
|
114 * @param limit offset immediately following the last character to |
|
115 * be copied |
|
116 * @param target UnicodeString into which to copy characters. |
|
117 * @return A reference to <TT>target</TT> |
|
118 * @stable ICU 2.1 |
|
119 */ |
|
120 virtual void extractBetween(int32_t start, |
|
121 int32_t limit, |
|
122 UnicodeString& target) const = 0; |
|
123 |
|
124 /** |
|
125 * Replaces a substring of this object with the given text. If the |
|
126 * characters being replaced have metadata, the new characters |
|
127 * that replace them should be given the same metadata. |
|
128 * |
|
129 * <p>Subclasses must ensure that if the text between start and |
|
130 * limit is equal to the replacement text, that replace has no |
|
131 * effect. That is, any metadata |
|
132 * should be unaffected. In addition, subclasses are encouraged to |
|
133 * check for initial and trailing identical characters, and make a |
|
134 * smaller replacement if possible. This will preserve as much |
|
135 * metadata as possible. |
|
136 * @param start the beginning index, inclusive; <code>0 <= start |
|
137 * <= limit</code>. |
|
138 * @param limit the ending index, exclusive; <code>start <= limit |
|
139 * <= length()</code>. |
|
140 * @param text the text to replace characters <code>start</code> |
|
141 * to <code>limit - 1</code> |
|
142 * @stable ICU 2.0 |
|
143 */ |
|
144 virtual void handleReplaceBetween(int32_t start, |
|
145 int32_t limit, |
|
146 const UnicodeString& text) = 0; |
|
147 // Note: All other methods in this class take the names of |
|
148 // existing UnicodeString methods. This method is the exception. |
|
149 // It is named differently because all replace methods of |
|
150 // UnicodeString return a UnicodeString&. The 'between' is |
|
151 // required in order to conform to the UnicodeString naming |
|
152 // convention; API taking start/length are named <operation>, and |
|
153 // those taking start/limit are named <operationBetween>. The |
|
154 // 'handle' is added because 'replaceBetween' and |
|
155 // 'doReplaceBetween' are already taken. |
|
156 |
|
157 /** |
|
158 * Copies a substring of this object, retaining metadata. |
|
159 * This method is used to duplicate or reorder substrings. |
|
160 * The destination index must not overlap the source range. |
|
161 * |
|
162 * @param start the beginning index, inclusive; <code>0 <= start <= |
|
163 * limit</code>. |
|
164 * @param limit the ending index, exclusive; <code>start <= limit <= |
|
165 * length()</code>. |
|
166 * @param dest the destination index. The characters from |
|
167 * <code>start..limit-1</code> will be copied to <code>dest</code>. |
|
168 * Implementations of this method may assume that <code>dest <= start || |
|
169 * dest >= limit</code>. |
|
170 * @stable ICU 2.0 |
|
171 */ |
|
172 virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0; |
|
173 |
|
174 /** |
|
175 * Returns true if this object contains metadata. If a |
|
176 * Replaceable object has metadata, calls to the Replaceable API |
|
177 * must be made so as to preserve metadata. If it does not, calls |
|
178 * to the Replaceable API may be optimized to improve performance. |
|
179 * The default implementation returns true. |
|
180 * @return true if this object contains metadata |
|
181 * @stable ICU 2.2 |
|
182 */ |
|
183 virtual UBool hasMetaData() const; |
|
184 |
|
185 /** |
|
186 * Clone this object, an instance of a subclass of Replaceable. |
|
187 * Clones can be used concurrently in multiple threads. |
|
188 * If a subclass does not implement clone(), or if an error occurs, |
|
189 * then NULL is returned. |
|
190 * The clone functions in all subclasses return a pointer to a Replaceable |
|
191 * because some compilers do not support covariant (same-as-this) |
|
192 * return types; cast to the appropriate subclass if necessary. |
|
193 * The caller must delete the clone. |
|
194 * |
|
195 * @return a clone of this object |
|
196 * |
|
197 * @see getDynamicClassID |
|
198 * @stable ICU 2.6 |
|
199 */ |
|
200 virtual Replaceable *clone() const; |
|
201 |
|
202 protected: |
|
203 |
|
204 /** |
|
205 * Default constructor. |
|
206 * @stable ICU 2.4 |
|
207 */ |
|
208 inline Replaceable(); |
|
209 |
|
210 /* |
|
211 * Assignment operator not declared. The compiler will provide one |
|
212 * which does nothing since this class does not contain any data members. |
|
213 * API/code coverage may show the assignment operator as present and |
|
214 * untested - ignore. |
|
215 * Subclasses need this assignment operator if they use compiler-provided |
|
216 * assignment operators of their own. An alternative to not declaring one |
|
217 * here would be to declare and empty-implement a protected or public one. |
|
218 Replaceable &Replaceable::operator=(const Replaceable &); |
|
219 */ |
|
220 |
|
221 /** |
|
222 * Virtual version of length(). |
|
223 * @stable ICU 2.4 |
|
224 */ |
|
225 virtual int32_t getLength() const = 0; |
|
226 |
|
227 /** |
|
228 * Virtual version of charAt(). |
|
229 * @stable ICU 2.4 |
|
230 */ |
|
231 virtual UChar getCharAt(int32_t offset) const = 0; |
|
232 |
|
233 /** |
|
234 * Virtual version of char32At(). |
|
235 * @stable ICU 2.4 |
|
236 */ |
|
237 virtual UChar32 getChar32At(int32_t offset) const = 0; |
|
238 }; |
|
239 |
|
240 inline Replaceable::Replaceable() {} |
|
241 |
|
242 inline int32_t |
|
243 Replaceable::length() const { |
|
244 return getLength(); |
|
245 } |
|
246 |
|
247 inline UChar |
|
248 Replaceable::charAt(int32_t offset) const { |
|
249 return getCharAt(offset); |
|
250 } |
|
251 |
|
252 inline UChar32 |
|
253 Replaceable::char32At(int32_t offset) const { |
|
254 return getChar32At(offset); |
|
255 } |
|
256 |
|
257 // There is no rep.cpp, see unistr.cpp for Replaceable function implementations. |
|
258 |
|
259 U_NAMESPACE_END |
|
260 |
|
261 #endif |