Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 // IWYU pragma: private, include "nsString.h"
7 #ifndef nsReadableUtils_h___
8 #define nsReadableUtils_h___
10 /**
11 * I guess all the routines in this file are all mis-named.
12 * According to our conventions, they should be |NS_xxx|.
13 */
15 #include "nsAString.h"
17 #include "nsTArrayForwardDeclare.h"
19 inline size_t Distance( const nsReadingIterator<char16_t>& start, const nsReadingIterator<char16_t>& end )
20 {
21 return end.get() - start.get();
22 }
23 inline size_t Distance( const nsReadingIterator<char>& start, const nsReadingIterator<char>& end )
24 {
25 return end.get() - start.get();
26 }
28 void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest );
29 void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
31 void LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest );
32 void CopyASCIItoUTF16( const char* aSource, nsAString& aDest );
34 void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
35 void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
37 void CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest );
38 void CopyUTF8toUTF16( const char* aSource, nsAString& aDest );
40 void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest );
41 void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
42 bool AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest,
43 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
45 void LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest );
46 void AppendASCIItoUTF16( const char* aSource, nsAString& aDest );
48 void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
49 bool AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest,
50 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
51 void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
52 bool AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest,
53 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
55 void AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest );
56 void AppendUTF8toUTF16( const char* aSource, nsAString& aDest );
58 #ifdef MOZ_USE_CHAR16_WRAPPER
59 inline void AppendUTF16toUTF8( char16ptr_t aSource, nsACString& aDest )
60 {
61 return AppendUTF16toUTF8(static_cast<const char16_t*>(aSource), aDest);
62 }
63 #endif
65 /**
66 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
67 *
68 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
69 * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
70 * This conversion is not well defined; but it reproduces legacy string behavior.
71 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
72 *
73 * @param aSource a 16-bit wide string
74 * @return a new |char| buffer you must free with |nsMemory::Free|.
75 */
76 char* ToNewCString( const nsAString& aSource );
79 /**
80 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
81 *
82 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
83 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
84 *
85 * @param aSource an 8-bit wide string
86 * @return a new |char| buffer you must free with |nsMemory::Free|.
87 */
88 char* ToNewCString( const nsACString& aSource );
90 /**
91 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
92 *
93 * Allocates and returns a new |char| buffer which you must free with
94 * |nsMemory::Free|.
95 * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
96 * copying |aSource| to your new buffer.
97 * The new buffer is zero-terminated, but that may not help you if |aSource|
98 * contains embedded nulls.
99 *
100 * @param aSource a UTF-16 string (made of char16_t's)
101 * @param aUTF8Count the number of 8-bit units that was returned
102 * @return a new |char| buffer you must free with |nsMemory::Free|.
103 */
105 char* ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count = nullptr );
108 /**
109 * Returns a new |char16_t| buffer containing a zero-terminated copy of
110 * |aSource|.
111 *
112 * Allocates and returns a new |char16_t| buffer which you must free with
113 * |nsMemory::Free|.
114 * The new buffer is zero-terminated, but that may not help you if |aSource|
115 * contains embedded nulls.
116 *
117 * @param aSource a UTF-16 string
118 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
119 */
120 char16_t* ToNewUnicode( const nsAString& aSource );
123 /**
124 * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
125 *
126 * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|.
127 * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
128 * This conversion is not well defined; but it reproduces legacy string behavior.
129 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
130 *
131 * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
132 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
133 */
134 char16_t* ToNewUnicode( const nsACString& aSource );
136 /**
137 * Returns the required length for a char16_t buffer holding
138 * a copy of aSource, using UTF-8 to UTF-16 conversion.
139 * The length does NOT include any space for zero-termination.
140 *
141 * @param aSource an 8-bit wide string, UTF-8 encoded
142 * @return length of UTF-16 encoded string copy, not zero-terminated
143 */
144 uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource );
146 /**
147 * Copies the source string into the specified buffer, converting UTF-8 to
148 * UTF-16 in the process. The conversion is well defined for valid UTF-8
149 * strings.
150 * The copied string will be zero-terminated! Any embedded nulls will be
151 * copied nonetheless. It is the caller's responsiblity to ensure the buffer
152 * is large enough to hold the string copy plus one char16_t for
153 * zero-termination!
154 *
155 * @see CalcUTF8ToUnicodeLength( const nsACString& )
156 * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
157 *
158 * @param aSource an 8-bit wide string, UTF-8 encoded
159 * @param aBuffer the buffer holding the converted string copy
160 * @param aUTF16Count receiving optionally the number of 16-bit units that
161 * were copied
162 * @return aBuffer pointer, for convenience
163 */
164 char16_t* UTF8ToUnicodeBuffer( const nsACString& aSource,
165 char16_t *aBuffer,
166 uint32_t *aUTF16Count = nullptr );
168 /**
169 * Returns a new |char16_t| buffer containing a zero-terminated copy
170 * of |aSource|.
171 *
172 * Allocates and returns a new |char| buffer which you must free with
173 * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16
174 * while copying |aSource| to your new buffer. This conversion is well defined
175 * for a valid UTF-8 string. The new buffer is zero-terminated, but that
176 * may not help you if |aSource| contains embedded nulls.
177 *
178 * @param aSource an 8-bit wide string, UTF-8 encoded
179 * @param aUTF16Count the number of 16-bit units that was returned
180 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
181 * (UTF-16 encoded)
182 */
183 char16_t* UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count = nullptr );
185 /**
186 * Copies |aLength| 16-bit code units from the start of |aSource| to the
187 * |char16_t| buffer |aDest|.
188 *
189 * After this operation |aDest| is not null terminated.
190 *
191 * @param aSource a UTF-16 string
192 * @param aSrcOffset start offset in the source string
193 * @param aDest a |char16_t| buffer
194 * @param aLength the number of 16-bit code units to copy
195 * @return pointer to destination buffer - identical to |aDest|
196 */
197 char16_t* CopyUnicodeTo( const nsAString& aSource,
198 uint32_t aSrcOffset,
199 char16_t* aDest,
200 uint32_t aLength );
203 /**
204 * Copies 16-bit characters between iterators |aSrcStart| and
205 * |aSrcEnd| to the writable string |aDest|. Similar to the
206 * |nsString::Mid| method.
207 *
208 * After this operation |aDest| is not null terminated.
209 *
210 * @param aSrcStart start source iterator
211 * @param aSrcEnd end source iterator
212 * @param aDest destination for the copy
213 */
214 void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
215 const nsAString::const_iterator& aSrcEnd,
216 nsAString& aDest );
218 /**
219 * Appends 16-bit characters between iterators |aSrcStart| and
220 * |aSrcEnd| to the writable string |aDest|.
221 *
222 * After this operation |aDest| is not null terminated.
223 *
224 * @param aSrcStart start source iterator
225 * @param aSrcEnd end source iterator
226 * @param aDest destination for the copy
227 */
228 void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
229 const nsAString::const_iterator& aSrcEnd,
230 nsAString& aDest );
232 /**
233 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
234 *
235 * @param aString a 16-bit wide string to scan
236 */
237 bool IsASCII( const nsAString& aString );
239 /**
240 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
241 *
242 * @param aString a 8-bit wide string to scan
243 */
244 bool IsASCII( const nsACString& aString );
246 /**
247 * Returns |true| if |aString| is a valid UTF-8 string.
248 * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
249 * It is mainly written to replace and roughly equivalent to
250 *
251 * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
252 *
253 * (see bug 191541)
254 * As such, it does not check for non-UTF-8 7bit encodings such as
255 * ISO-2022-JP and HZ.
256 *
257 * It rejects sequences with the following errors:
258 *
259 * byte sequences that cannot be decoded into characters according to
260 * UTF-8's rules (including cases where the input is part of a valid
261 * UTF-8 sequence but starts or ends mid-character)
262 * overlong sequences (i.e., cases where a character was encoded
263 * non-canonically by using more bytes than necessary)
264 * surrogate codepoints (i.e., the codepoints reserved for
265 representing astral characters in UTF-16)
266 * codepoints above the unicode range (i.e., outside the first 17
267 * planes; higher than U+10FFFF), in accordance with
268 * http://tools.ietf.org/html/rfc3629
269 * when aRejectNonChar is true (the default), any codepoint whose low
270 * 16 bits are 0xFFFE or 0xFFFF
272 *
273 * @param aString an 8-bit wide string to scan
274 * @param aRejectNonChar a boolean to control the rejection of utf-8
275 * non characters
276 */
277 bool IsUTF8( const nsACString& aString, bool aRejectNonChar = true );
279 bool ParseString(const nsACString& aAstring, char aDelimiter,
280 nsTArray<nsCString>& aArray);
282 /**
283 * Converts case in place in the argument string.
284 */
285 void ToUpperCase( nsACString& );
287 void ToLowerCase( nsACString& );
289 void ToUpperCase( nsCSubstring& );
291 void ToLowerCase( nsCSubstring& );
293 /**
294 * Converts case from string aSource to aDest.
295 */
296 void ToUpperCase( const nsACString& aSource, nsACString& aDest );
298 void ToLowerCase( const nsACString& aSource, nsACString& aDest );
300 /**
301 * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
302 *
303 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
304 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
305 *
306 * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
307 * If we need something faster, then we can implement that later.
308 */
310 bool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
311 bool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
313 /* sometimes we don't care about where the string was, just that we
314 * found it or not */
315 inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() )
316 {
317 nsAString::const_iterator start, end;
318 aSource.BeginReading(start);
319 aSource.EndReading(end);
320 return FindInReadable(aPattern, start, end, compare);
321 }
323 inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() )
324 {
325 nsACString::const_iterator start, end;
326 aSource.BeginReading(start);
327 aSource.EndReading(end);
328 return FindInReadable(aPattern, start, end, compare);
329 }
332 bool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& );
334 /**
335 * Finds the rightmost occurrence of |aPattern|
336 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
337 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
338 *
339 */
340 bool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
341 bool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
343 /**
344 * Finds the leftmost occurrence of |aChar|, if any in the range
345 * |aSearchStart|..|aSearchEnd|.
346 *
347 * Returns |true| if a match was found, and adjusts |aSearchStart| to
348 * point to the match. If no match was found, returns |false| and
349 * makes |aSearchStart == aSearchEnd|.
350 */
351 bool FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd );
352 bool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd );
354 /**
355 * Finds the number of occurences of |aChar| in the string |aStr|
356 */
357 uint32_t CountCharInReadable( const nsAString& aStr,
358 char16_t aChar );
359 uint32_t CountCharInReadable( const nsACString& aStr,
360 char aChar );
362 bool
363 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
364 const nsStringComparator& aComparator =
365 nsDefaultStringComparator() );
366 bool
367 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
368 const nsCStringComparator& aComparator =
369 nsDefaultCStringComparator() );
370 bool
371 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
372 const nsStringComparator& aComparator =
373 nsDefaultStringComparator() );
374 bool
375 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
376 const nsCStringComparator& aComparator =
377 nsDefaultCStringComparator() );
379 const nsAFlatString& EmptyString();
380 const nsAFlatCString& EmptyCString();
382 const nsAFlatString& NullString();
383 const nsAFlatCString& NullCString();
385 /**
386 * Compare a UTF-8 string to an UTF-16 string.
387 *
388 * Returns 0 if the strings are equal, -1 if aUTF8String is less
389 * than aUTF16Count, and 1 in the reverse case. In case of fatal
390 * error (eg the strings are not valid UTF8 and UTF16 respectively),
391 * this method will return INT32_MIN.
392 */
393 int32_t
394 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
395 const nsASingleFragmentString& aUTF16String);
397 void
398 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
400 template<class T>
401 inline bool EnsureStringLength(T& aStr, uint32_t aLen)
402 {
403 aStr.SetLength(aLen);
404 return (aStr.Length() == aLen);
405 }
407 #endif // !defined(nsReadableUtils_h___)