1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/rep.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,261 @@ 1.4 +/* 1.5 +************************************************************************** 1.6 +* Copyright (C) 1999-2012, International Business Machines Corporation and 1.7 +* others. All Rights Reserved. 1.8 +************************************************************************** 1.9 +* Date Name Description 1.10 +* 11/17/99 aliu Creation. Ported from java. Modified to 1.11 +* match current UnicodeString API. Forced 1.12 +* to use name "handleReplaceBetween" because 1.13 +* of existing methods in UnicodeString. 1.14 +************************************************************************** 1.15 +*/ 1.16 + 1.17 +#ifndef REP_H 1.18 +#define REP_H 1.19 + 1.20 +#include "unicode/uobject.h" 1.21 + 1.22 +/** 1.23 + * \file 1.24 + * \brief C++ API: Replaceable String 1.25 + */ 1.26 + 1.27 +U_NAMESPACE_BEGIN 1.28 + 1.29 +class UnicodeString; 1.30 + 1.31 +/** 1.32 + * <code>Replaceable</code> is an abstract base class representing a 1.33 + * string of characters that supports the replacement of a range of 1.34 + * itself with a new string of characters. It is used by APIs that 1.35 + * change a piece of text while retaining metadata. Metadata is data 1.36 + * other than the Unicode characters returned by char32At(). One 1.37 + * example of metadata is style attributes; another is an edit 1.38 + * history, marking each character with an author and revision number. 1.39 + * 1.40 + * <p>An implicit aspect of the <code>Replaceable</code> API is that 1.41 + * during a replace operation, new characters take on the metadata of 1.42 + * the old characters. For example, if the string "the <b>bold</b> 1.43 + * font" has range (4, 8) replaced with "strong", then it becomes "the 1.44 + * <b>strong</b> font". 1.45 + * 1.46 + * <p><code>Replaceable</code> specifies ranges using a start 1.47 + * offset and a limit offset. The range of characters thus specified 1.48 + * includes the characters at offset start..limit-1. That is, the 1.49 + * start offset is inclusive, and the limit offset is exclusive. 1.50 + * 1.51 + * <p><code>Replaceable</code> also includes API to access characters 1.52 + * in the string: <code>length()</code>, <code>charAt()</code>, 1.53 + * <code>char32At()</code>, and <code>extractBetween()</code>. 1.54 + * 1.55 + * <p>For a subclass to support metadata, typical behavior of 1.56 + * <code>replace()</code> is the following: 1.57 + * <ul> 1.58 + * <li>Set the metadata of the new text to the metadata of the first 1.59 + * character replaced</li> 1.60 + * <li>If no characters are replaced, use the metadata of the 1.61 + * previous character</li> 1.62 + * <li>If there is no previous character (i.e. start == 0), use the 1.63 + * following character</li> 1.64 + * <li>If there is no following character (i.e. the replaceable was 1.65 + * empty), use default metadata.<br> 1.66 + * <li>If the code point U+FFFF is seen, it should be interpreted as 1.67 + * a special marker having no metadata<li> 1.68 + * </li> 1.69 + * </ul> 1.70 + * If this is not the behavior, the subclass should document any differences. 1.71 + * @author Alan Liu 1.72 + * @stable ICU 2.0 1.73 + */ 1.74 +class U_COMMON_API Replaceable : public UObject { 1.75 + 1.76 +public: 1.77 + /** 1.78 + * Destructor. 1.79 + * @stable ICU 2.0 1.80 + */ 1.81 + virtual ~Replaceable(); 1.82 + 1.83 + /** 1.84 + * Returns the number of 16-bit code units in the text. 1.85 + * @return number of 16-bit code units in text 1.86 + * @stable ICU 1.8 1.87 + */ 1.88 + inline int32_t length() const; 1.89 + 1.90 + /** 1.91 + * Returns the 16-bit code unit at the given offset into the text. 1.92 + * @param offset an integer between 0 and <code>length()</code>-1 1.93 + * inclusive 1.94 + * @return 16-bit code unit of text at given offset 1.95 + * @stable ICU 1.8 1.96 + */ 1.97 + inline UChar charAt(int32_t offset) const; 1.98 + 1.99 + /** 1.100 + * Returns the 32-bit code point at the given 16-bit offset into 1.101 + * the text. This assumes the text is stored as 16-bit code units 1.102 + * with surrogate pairs intermixed. If the offset of a leading or 1.103 + * trailing code unit of a surrogate pair is given, return the 1.104 + * code point of the surrogate pair. 1.105 + * 1.106 + * @param offset an integer between 0 and <code>length()</code>-1 1.107 + * inclusive 1.108 + * @return 32-bit code point of text at given offset 1.109 + * @stable ICU 1.8 1.110 + */ 1.111 + inline UChar32 char32At(int32_t offset) const; 1.112 + 1.113 + /** 1.114 + * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) 1.115 + * into the UnicodeString <tt>target</tt>. 1.116 + * @param start offset of first character which will be copied 1.117 + * @param limit offset immediately following the last character to 1.118 + * be copied 1.119 + * @param target UnicodeString into which to copy characters. 1.120 + * @return A reference to <TT>target</TT> 1.121 + * @stable ICU 2.1 1.122 + */ 1.123 + virtual void extractBetween(int32_t start, 1.124 + int32_t limit, 1.125 + UnicodeString& target) const = 0; 1.126 + 1.127 + /** 1.128 + * Replaces a substring of this object with the given text. If the 1.129 + * characters being replaced have metadata, the new characters 1.130 + * that replace them should be given the same metadata. 1.131 + * 1.132 + * <p>Subclasses must ensure that if the text between start and 1.133 + * limit is equal to the replacement text, that replace has no 1.134 + * effect. That is, any metadata 1.135 + * should be unaffected. In addition, subclasses are encouraged to 1.136 + * check for initial and trailing identical characters, and make a 1.137 + * smaller replacement if possible. This will preserve as much 1.138 + * metadata as possible. 1.139 + * @param start the beginning index, inclusive; <code>0 <= start 1.140 + * <= limit</code>. 1.141 + * @param limit the ending index, exclusive; <code>start <= limit 1.142 + * <= length()</code>. 1.143 + * @param text the text to replace characters <code>start</code> 1.144 + * to <code>limit - 1</code> 1.145 + * @stable ICU 2.0 1.146 + */ 1.147 + virtual void handleReplaceBetween(int32_t start, 1.148 + int32_t limit, 1.149 + const UnicodeString& text) = 0; 1.150 + // Note: All other methods in this class take the names of 1.151 + // existing UnicodeString methods. This method is the exception. 1.152 + // It is named differently because all replace methods of 1.153 + // UnicodeString return a UnicodeString&. The 'between' is 1.154 + // required in order to conform to the UnicodeString naming 1.155 + // convention; API taking start/length are named <operation>, and 1.156 + // those taking start/limit are named <operationBetween>. The 1.157 + // 'handle' is added because 'replaceBetween' and 1.158 + // 'doReplaceBetween' are already taken. 1.159 + 1.160 + /** 1.161 + * Copies a substring of this object, retaining metadata. 1.162 + * This method is used to duplicate or reorder substrings. 1.163 + * The destination index must not overlap the source range. 1.164 + * 1.165 + * @param start the beginning index, inclusive; <code>0 <= start <= 1.166 + * limit</code>. 1.167 + * @param limit the ending index, exclusive; <code>start <= limit <= 1.168 + * length()</code>. 1.169 + * @param dest the destination index. The characters from 1.170 + * <code>start..limit-1</code> will be copied to <code>dest</code>. 1.171 + * Implementations of this method may assume that <code>dest <= start || 1.172 + * dest >= limit</code>. 1.173 + * @stable ICU 2.0 1.174 + */ 1.175 + virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0; 1.176 + 1.177 + /** 1.178 + * Returns true if this object contains metadata. If a 1.179 + * Replaceable object has metadata, calls to the Replaceable API 1.180 + * must be made so as to preserve metadata. If it does not, calls 1.181 + * to the Replaceable API may be optimized to improve performance. 1.182 + * The default implementation returns true. 1.183 + * @return true if this object contains metadata 1.184 + * @stable ICU 2.2 1.185 + */ 1.186 + virtual UBool hasMetaData() const; 1.187 + 1.188 + /** 1.189 + * Clone this object, an instance of a subclass of Replaceable. 1.190 + * Clones can be used concurrently in multiple threads. 1.191 + * If a subclass does not implement clone(), or if an error occurs, 1.192 + * then NULL is returned. 1.193 + * The clone functions in all subclasses return a pointer to a Replaceable 1.194 + * because some compilers do not support covariant (same-as-this) 1.195 + * return types; cast to the appropriate subclass if necessary. 1.196 + * The caller must delete the clone. 1.197 + * 1.198 + * @return a clone of this object 1.199 + * 1.200 + * @see getDynamicClassID 1.201 + * @stable ICU 2.6 1.202 + */ 1.203 + virtual Replaceable *clone() const; 1.204 + 1.205 +protected: 1.206 + 1.207 + /** 1.208 + * Default constructor. 1.209 + * @stable ICU 2.4 1.210 + */ 1.211 + inline Replaceable(); 1.212 + 1.213 + /* 1.214 + * Assignment operator not declared. The compiler will provide one 1.215 + * which does nothing since this class does not contain any data members. 1.216 + * API/code coverage may show the assignment operator as present and 1.217 + * untested - ignore. 1.218 + * Subclasses need this assignment operator if they use compiler-provided 1.219 + * assignment operators of their own. An alternative to not declaring one 1.220 + * here would be to declare and empty-implement a protected or public one. 1.221 + Replaceable &Replaceable::operator=(const Replaceable &); 1.222 + */ 1.223 + 1.224 + /** 1.225 + * Virtual version of length(). 1.226 + * @stable ICU 2.4 1.227 + */ 1.228 + virtual int32_t getLength() const = 0; 1.229 + 1.230 + /** 1.231 + * Virtual version of charAt(). 1.232 + * @stable ICU 2.4 1.233 + */ 1.234 + virtual UChar getCharAt(int32_t offset) const = 0; 1.235 + 1.236 + /** 1.237 + * Virtual version of char32At(). 1.238 + * @stable ICU 2.4 1.239 + */ 1.240 + virtual UChar32 getChar32At(int32_t offset) const = 0; 1.241 +}; 1.242 + 1.243 +inline Replaceable::Replaceable() {} 1.244 + 1.245 +inline int32_t 1.246 +Replaceable::length() const { 1.247 + return getLength(); 1.248 +} 1.249 + 1.250 +inline UChar 1.251 +Replaceable::charAt(int32_t offset) const { 1.252 + return getCharAt(offset); 1.253 +} 1.254 + 1.255 +inline UChar32 1.256 +Replaceable::char32At(int32_t offset) const { 1.257 + return getChar32At(offset); 1.258 +} 1.259 + 1.260 +// There is no rep.cpp, see unistr.cpp for Replaceable function implementations. 1.261 + 1.262 +U_NAMESPACE_END 1.263 + 1.264 +#endif