|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 // IWYU pragma: private, include "nsString.h" |
|
6 |
|
7 #ifndef nsReadableUtils_h___ |
|
8 #define nsReadableUtils_h___ |
|
9 |
|
10 /** |
|
11 * I guess all the routines in this file are all mis-named. |
|
12 * According to our conventions, they should be |NS_xxx|. |
|
13 */ |
|
14 |
|
15 #include "nsAString.h" |
|
16 |
|
17 #include "nsTArrayForwardDeclare.h" |
|
18 |
|
19 inline size_t Distance( const nsReadingIterator<char16_t>& start, const nsReadingIterator<char16_t>& end ) |
|
20 { |
|
21 return end.get() - start.get(); |
|
22 } |
|
23 inline size_t Distance( const nsReadingIterator<char>& start, const nsReadingIterator<char>& end ) |
|
24 { |
|
25 return end.get() - start.get(); |
|
26 } |
|
27 |
|
28 void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest ); |
|
29 void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest ); |
|
30 |
|
31 void LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest ); |
|
32 void CopyASCIItoUTF16( const char* aSource, nsAString& aDest ); |
|
33 |
|
34 void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest ); |
|
35 void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest ); |
|
36 |
|
37 void CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest ); |
|
38 void CopyUTF8toUTF16( const char* aSource, nsAString& aDest ); |
|
39 |
|
40 void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest ); |
|
41 void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest ); |
|
42 bool AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest, |
|
43 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; |
|
44 |
|
45 void LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest ); |
|
46 void AppendASCIItoUTF16( const char* aSource, nsAString& aDest ); |
|
47 |
|
48 void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest ); |
|
49 bool AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest, |
|
50 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; |
|
51 void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest ); |
|
52 bool AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest, |
|
53 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; |
|
54 |
|
55 void AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest ); |
|
56 void AppendUTF8toUTF16( const char* aSource, nsAString& aDest ); |
|
57 |
|
58 #ifdef MOZ_USE_CHAR16_WRAPPER |
|
59 inline void AppendUTF16toUTF8( char16ptr_t aSource, nsACString& aDest ) |
|
60 { |
|
61 return AppendUTF16toUTF8(static_cast<const char16_t*>(aSource), aDest); |
|
62 } |
|
63 #endif |
|
64 |
|
65 /** |
|
66 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. |
|
67 * |
|
68 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|. |
|
69 * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer. |
|
70 * This conversion is not well defined; but it reproduces legacy string behavior. |
|
71 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. |
|
72 * |
|
73 * @param aSource a 16-bit wide string |
|
74 * @return a new |char| buffer you must free with |nsMemory::Free|. |
|
75 */ |
|
76 char* ToNewCString( const nsAString& aSource ); |
|
77 |
|
78 |
|
79 /** |
|
80 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. |
|
81 * |
|
82 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|. |
|
83 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. |
|
84 * |
|
85 * @param aSource an 8-bit wide string |
|
86 * @return a new |char| buffer you must free with |nsMemory::Free|. |
|
87 */ |
|
88 char* ToNewCString( const nsACString& aSource ); |
|
89 |
|
90 /** |
|
91 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. |
|
92 * |
|
93 * Allocates and returns a new |char| buffer which you must free with |
|
94 * |nsMemory::Free|. |
|
95 * Performs an encoding conversion from a UTF-16 string to a UTF-8 string |
|
96 * copying |aSource| to your new buffer. |
|
97 * The new buffer is zero-terminated, but that may not help you if |aSource| |
|
98 * contains embedded nulls. |
|
99 * |
|
100 * @param aSource a UTF-16 string (made of char16_t's) |
|
101 * @param aUTF8Count the number of 8-bit units that was returned |
|
102 * @return a new |char| buffer you must free with |nsMemory::Free|. |
|
103 */ |
|
104 |
|
105 char* ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count = nullptr ); |
|
106 |
|
107 |
|
108 /** |
|
109 * Returns a new |char16_t| buffer containing a zero-terminated copy of |
|
110 * |aSource|. |
|
111 * |
|
112 * Allocates and returns a new |char16_t| buffer which you must free with |
|
113 * |nsMemory::Free|. |
|
114 * The new buffer is zero-terminated, but that may not help you if |aSource| |
|
115 * contains embedded nulls. |
|
116 * |
|
117 * @param aSource a UTF-16 string |
|
118 * @return a new |char16_t| buffer you must free with |nsMemory::Free|. |
|
119 */ |
|
120 char16_t* ToNewUnicode( const nsAString& aSource ); |
|
121 |
|
122 |
|
123 /** |
|
124 * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|. |
|
125 * |
|
126 * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|. |
|
127 * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer. |
|
128 * This conversion is not well defined; but it reproduces legacy string behavior. |
|
129 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. |
|
130 * |
|
131 * @param aSource an 8-bit wide string (a C-string, NOT UTF-8) |
|
132 * @return a new |char16_t| buffer you must free with |nsMemory::Free|. |
|
133 */ |
|
134 char16_t* ToNewUnicode( const nsACString& aSource ); |
|
135 |
|
136 /** |
|
137 * Returns the required length for a char16_t buffer holding |
|
138 * a copy of aSource, using UTF-8 to UTF-16 conversion. |
|
139 * The length does NOT include any space for zero-termination. |
|
140 * |
|
141 * @param aSource an 8-bit wide string, UTF-8 encoded |
|
142 * @return length of UTF-16 encoded string copy, not zero-terminated |
|
143 */ |
|
144 uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource ); |
|
145 |
|
146 /** |
|
147 * Copies the source string into the specified buffer, converting UTF-8 to |
|
148 * UTF-16 in the process. The conversion is well defined for valid UTF-8 |
|
149 * strings. |
|
150 * The copied string will be zero-terminated! Any embedded nulls will be |
|
151 * copied nonetheless. It is the caller's responsiblity to ensure the buffer |
|
152 * is large enough to hold the string copy plus one char16_t for |
|
153 * zero-termination! |
|
154 * |
|
155 * @see CalcUTF8ToUnicodeLength( const nsACString& ) |
|
156 * @see UTF8ToNewUnicode( const nsACString&, uint32_t* ) |
|
157 * |
|
158 * @param aSource an 8-bit wide string, UTF-8 encoded |
|
159 * @param aBuffer the buffer holding the converted string copy |
|
160 * @param aUTF16Count receiving optionally the number of 16-bit units that |
|
161 * were copied |
|
162 * @return aBuffer pointer, for convenience |
|
163 */ |
|
164 char16_t* UTF8ToUnicodeBuffer( const nsACString& aSource, |
|
165 char16_t *aBuffer, |
|
166 uint32_t *aUTF16Count = nullptr ); |
|
167 |
|
168 /** |
|
169 * Returns a new |char16_t| buffer containing a zero-terminated copy |
|
170 * of |aSource|. |
|
171 * |
|
172 * Allocates and returns a new |char| buffer which you must free with |
|
173 * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16 |
|
174 * while copying |aSource| to your new buffer. This conversion is well defined |
|
175 * for a valid UTF-8 string. The new buffer is zero-terminated, but that |
|
176 * may not help you if |aSource| contains embedded nulls. |
|
177 * |
|
178 * @param aSource an 8-bit wide string, UTF-8 encoded |
|
179 * @param aUTF16Count the number of 16-bit units that was returned |
|
180 * @return a new |char16_t| buffer you must free with |nsMemory::Free|. |
|
181 * (UTF-16 encoded) |
|
182 */ |
|
183 char16_t* UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count = nullptr ); |
|
184 |
|
185 /** |
|
186 * Copies |aLength| 16-bit code units from the start of |aSource| to the |
|
187 * |char16_t| buffer |aDest|. |
|
188 * |
|
189 * After this operation |aDest| is not null terminated. |
|
190 * |
|
191 * @param aSource a UTF-16 string |
|
192 * @param aSrcOffset start offset in the source string |
|
193 * @param aDest a |char16_t| buffer |
|
194 * @param aLength the number of 16-bit code units to copy |
|
195 * @return pointer to destination buffer - identical to |aDest| |
|
196 */ |
|
197 char16_t* CopyUnicodeTo( const nsAString& aSource, |
|
198 uint32_t aSrcOffset, |
|
199 char16_t* aDest, |
|
200 uint32_t aLength ); |
|
201 |
|
202 |
|
203 /** |
|
204 * Copies 16-bit characters between iterators |aSrcStart| and |
|
205 * |aSrcEnd| to the writable string |aDest|. Similar to the |
|
206 * |nsString::Mid| method. |
|
207 * |
|
208 * After this operation |aDest| is not null terminated. |
|
209 * |
|
210 * @param aSrcStart start source iterator |
|
211 * @param aSrcEnd end source iterator |
|
212 * @param aDest destination for the copy |
|
213 */ |
|
214 void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart, |
|
215 const nsAString::const_iterator& aSrcEnd, |
|
216 nsAString& aDest ); |
|
217 |
|
218 /** |
|
219 * Appends 16-bit characters between iterators |aSrcStart| and |
|
220 * |aSrcEnd| to the writable string |aDest|. |
|
221 * |
|
222 * After this operation |aDest| is not null terminated. |
|
223 * |
|
224 * @param aSrcStart start source iterator |
|
225 * @param aSrcEnd end source iterator |
|
226 * @param aDest destination for the copy |
|
227 */ |
|
228 void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart, |
|
229 const nsAString::const_iterator& aSrcEnd, |
|
230 nsAString& aDest ); |
|
231 |
|
232 /** |
|
233 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). |
|
234 * |
|
235 * @param aString a 16-bit wide string to scan |
|
236 */ |
|
237 bool IsASCII( const nsAString& aString ); |
|
238 |
|
239 /** |
|
240 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). |
|
241 * |
|
242 * @param aString a 8-bit wide string to scan |
|
243 */ |
|
244 bool IsASCII( const nsACString& aString ); |
|
245 |
|
246 /** |
|
247 * Returns |true| if |aString| is a valid UTF-8 string. |
|
248 * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator. |
|
249 * It is mainly written to replace and roughly equivalent to |
|
250 * |
|
251 * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str))) |
|
252 * |
|
253 * (see bug 191541) |
|
254 * As such, it does not check for non-UTF-8 7bit encodings such as |
|
255 * ISO-2022-JP and HZ. |
|
256 * |
|
257 * It rejects sequences with the following errors: |
|
258 * |
|
259 * byte sequences that cannot be decoded into characters according to |
|
260 * UTF-8's rules (including cases where the input is part of a valid |
|
261 * UTF-8 sequence but starts or ends mid-character) |
|
262 * overlong sequences (i.e., cases where a character was encoded |
|
263 * non-canonically by using more bytes than necessary) |
|
264 * surrogate codepoints (i.e., the codepoints reserved for |
|
265 representing astral characters in UTF-16) |
|
266 * codepoints above the unicode range (i.e., outside the first 17 |
|
267 * planes; higher than U+10FFFF), in accordance with |
|
268 * http://tools.ietf.org/html/rfc3629 |
|
269 * when aRejectNonChar is true (the default), any codepoint whose low |
|
270 * 16 bits are 0xFFFE or 0xFFFF |
|
271 |
|
272 * |
|
273 * @param aString an 8-bit wide string to scan |
|
274 * @param aRejectNonChar a boolean to control the rejection of utf-8 |
|
275 * non characters |
|
276 */ |
|
277 bool IsUTF8( const nsACString& aString, bool aRejectNonChar = true ); |
|
278 |
|
279 bool ParseString(const nsACString& aAstring, char aDelimiter, |
|
280 nsTArray<nsCString>& aArray); |
|
281 |
|
282 /** |
|
283 * Converts case in place in the argument string. |
|
284 */ |
|
285 void ToUpperCase( nsACString& ); |
|
286 |
|
287 void ToLowerCase( nsACString& ); |
|
288 |
|
289 void ToUpperCase( nsCSubstring& ); |
|
290 |
|
291 void ToLowerCase( nsCSubstring& ); |
|
292 |
|
293 /** |
|
294 * Converts case from string aSource to aDest. |
|
295 */ |
|
296 void ToUpperCase( const nsACString& aSource, nsACString& aDest ); |
|
297 |
|
298 void ToLowerCase( const nsACString& aSource, nsACString& aDest ); |
|
299 |
|
300 /** |
|
301 * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|. |
|
302 * |
|
303 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to |
|
304 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|. |
|
305 * |
|
306 * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|. |
|
307 * If we need something faster, then we can implement that later. |
|
308 */ |
|
309 |
|
310 bool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() ); |
|
311 bool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() ); |
|
312 |
|
313 /* sometimes we don't care about where the string was, just that we |
|
314 * found it or not */ |
|
315 inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() ) |
|
316 { |
|
317 nsAString::const_iterator start, end; |
|
318 aSource.BeginReading(start); |
|
319 aSource.EndReading(end); |
|
320 return FindInReadable(aPattern, start, end, compare); |
|
321 } |
|
322 |
|
323 inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() ) |
|
324 { |
|
325 nsACString::const_iterator start, end; |
|
326 aSource.BeginReading(start); |
|
327 aSource.EndReading(end); |
|
328 return FindInReadable(aPattern, start, end, compare); |
|
329 } |
|
330 |
|
331 |
|
332 bool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& ); |
|
333 |
|
334 /** |
|
335 * Finds the rightmost occurrence of |aPattern| |
|
336 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to |
|
337 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|. |
|
338 * |
|
339 */ |
|
340 bool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() ); |
|
341 bool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() ); |
|
342 |
|
343 /** |
|
344 * Finds the leftmost occurrence of |aChar|, if any in the range |
|
345 * |aSearchStart|..|aSearchEnd|. |
|
346 * |
|
347 * Returns |true| if a match was found, and adjusts |aSearchStart| to |
|
348 * point to the match. If no match was found, returns |false| and |
|
349 * makes |aSearchStart == aSearchEnd|. |
|
350 */ |
|
351 bool FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd ); |
|
352 bool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd ); |
|
353 |
|
354 /** |
|
355 * Finds the number of occurences of |aChar| in the string |aStr| |
|
356 */ |
|
357 uint32_t CountCharInReadable( const nsAString& aStr, |
|
358 char16_t aChar ); |
|
359 uint32_t CountCharInReadable( const nsACString& aStr, |
|
360 char aChar ); |
|
361 |
|
362 bool |
|
363 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring, |
|
364 const nsStringComparator& aComparator = |
|
365 nsDefaultStringComparator() ); |
|
366 bool |
|
367 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring, |
|
368 const nsCStringComparator& aComparator = |
|
369 nsDefaultCStringComparator() ); |
|
370 bool |
|
371 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring, |
|
372 const nsStringComparator& aComparator = |
|
373 nsDefaultStringComparator() ); |
|
374 bool |
|
375 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring, |
|
376 const nsCStringComparator& aComparator = |
|
377 nsDefaultCStringComparator() ); |
|
378 |
|
379 const nsAFlatString& EmptyString(); |
|
380 const nsAFlatCString& EmptyCString(); |
|
381 |
|
382 const nsAFlatString& NullString(); |
|
383 const nsAFlatCString& NullCString(); |
|
384 |
|
385 /** |
|
386 * Compare a UTF-8 string to an UTF-16 string. |
|
387 * |
|
388 * Returns 0 if the strings are equal, -1 if aUTF8String is less |
|
389 * than aUTF16Count, and 1 in the reverse case. In case of fatal |
|
390 * error (eg the strings are not valid UTF8 and UTF16 respectively), |
|
391 * this method will return INT32_MIN. |
|
392 */ |
|
393 int32_t |
|
394 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, |
|
395 const nsASingleFragmentString& aUTF16String); |
|
396 |
|
397 void |
|
398 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest); |
|
399 |
|
400 template<class T> |
|
401 inline bool EnsureStringLength(T& aStr, uint32_t aLen) |
|
402 { |
|
403 aStr.SetLength(aLen); |
|
404 return (aStr.Length() == aLen); |
|
405 } |
|
406 |
|
407 #endif // !defined(nsReadableUtils_h___) |