|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsReadableUtils.h" |
|
7 |
|
8 #include "nsMemory.h" |
|
9 #include "nsString.h" |
|
10 #include "nsTArray.h" |
|
11 #include "nsUTF8Utils.h" |
|
12 |
|
13 void |
|
14 LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest ) |
|
15 { |
|
16 aDest.Truncate(); |
|
17 LossyAppendUTF16toASCII(aSource, aDest); |
|
18 } |
|
19 |
|
20 void |
|
21 CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest ) |
|
22 { |
|
23 aDest.Truncate(); |
|
24 AppendASCIItoUTF16(aSource, aDest); |
|
25 } |
|
26 |
|
27 void |
|
28 LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest ) |
|
29 { |
|
30 aDest.Truncate(); |
|
31 if (aSource) { |
|
32 LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); |
|
33 } |
|
34 } |
|
35 |
|
36 void |
|
37 CopyASCIItoUTF16( const char* aSource, nsAString& aDest ) |
|
38 { |
|
39 aDest.Truncate(); |
|
40 if (aSource) { |
|
41 AppendASCIItoUTF16(nsDependentCString(aSource), aDest); |
|
42 } |
|
43 } |
|
44 |
|
45 void |
|
46 CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest ) |
|
47 { |
|
48 aDest.Truncate(); |
|
49 AppendUTF16toUTF8(aSource, aDest); |
|
50 } |
|
51 |
|
52 void |
|
53 CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest ) |
|
54 { |
|
55 aDest.Truncate(); |
|
56 AppendUTF8toUTF16(aSource, aDest); |
|
57 } |
|
58 |
|
59 void |
|
60 CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest ) |
|
61 { |
|
62 aDest.Truncate(); |
|
63 AppendUTF16toUTF8(aSource, aDest); |
|
64 } |
|
65 |
|
66 void |
|
67 CopyUTF8toUTF16( const char* aSource, nsAString& aDest ) |
|
68 { |
|
69 aDest.Truncate(); |
|
70 AppendUTF8toUTF16(aSource, aDest); |
|
71 } |
|
72 |
|
73 void |
|
74 LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest ) |
|
75 { |
|
76 uint32_t old_dest_length = aDest.Length(); |
|
77 aDest.SetLength(old_dest_length + aSource.Length()); |
|
78 |
|
79 nsAString::const_iterator fromBegin, fromEnd; |
|
80 |
|
81 nsACString::iterator dest; |
|
82 aDest.BeginWriting(dest); |
|
83 |
|
84 dest.advance(old_dest_length); |
|
85 |
|
86 // right now, this won't work on multi-fragment destinations |
|
87 LossyConvertEncoding16to8 converter(dest.get()); |
|
88 |
|
89 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); |
|
90 } |
|
91 |
|
92 void |
|
93 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest ) |
|
94 { |
|
95 if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible_t())) { |
|
96 NS_ABORT_OOM(aDest.Length() + aSource.Length()); |
|
97 } |
|
98 } |
|
99 |
|
100 bool |
|
101 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest, |
|
102 const mozilla::fallible_t& ) |
|
103 { |
|
104 uint32_t old_dest_length = aDest.Length(); |
|
105 if (!aDest.SetLength(old_dest_length + aSource.Length(), mozilla::fallible_t())) { |
|
106 return false; |
|
107 } |
|
108 |
|
109 nsACString::const_iterator fromBegin, fromEnd; |
|
110 |
|
111 nsAString::iterator dest; |
|
112 aDest.BeginWriting(dest); |
|
113 |
|
114 dest.advance(old_dest_length); |
|
115 |
|
116 // right now, this won't work on multi-fragment destinations |
|
117 LossyConvertEncoding8to16 converter(dest.get()); |
|
118 |
|
119 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); |
|
120 return true; |
|
121 } |
|
122 |
|
123 void |
|
124 LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest ) |
|
125 { |
|
126 if (aSource) { |
|
127 LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); |
|
128 } |
|
129 } |
|
130 |
|
131 void |
|
132 AppendASCIItoUTF16( const char* aSource, nsAString& aDest ) |
|
133 { |
|
134 if (aSource) { |
|
135 AppendASCIItoUTF16(nsDependentCString(aSource), aDest); |
|
136 } |
|
137 } |
|
138 |
|
139 void |
|
140 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest ) |
|
141 { |
|
142 if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible_t())) { |
|
143 NS_ABORT_OOM(aDest.Length() + aSource.Length()); |
|
144 } |
|
145 } |
|
146 |
|
147 bool |
|
148 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest, |
|
149 const mozilla::fallible_t& ) |
|
150 { |
|
151 nsAString::const_iterator source_start, source_end; |
|
152 CalculateUTF8Size calculator; |
|
153 copy_string(aSource.BeginReading(source_start), |
|
154 aSource.EndReading(source_end), calculator); |
|
155 |
|
156 uint32_t count = calculator.Size(); |
|
157 |
|
158 if (count) |
|
159 { |
|
160 uint32_t old_dest_length = aDest.Length(); |
|
161 |
|
162 // Grow the buffer if we need to. |
|
163 if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) { |
|
164 return false; |
|
165 } |
|
166 |
|
167 // All ready? Time to convert |
|
168 |
|
169 ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length); |
|
170 copy_string(aSource.BeginReading(source_start), |
|
171 aSource.EndReading(source_end), converter); |
|
172 |
|
173 NS_ASSERTION(converter.Size() == count, |
|
174 "Unexpected disparity between CalculateUTF8Size and " |
|
175 "ConvertUTF16toUTF8"); |
|
176 } |
|
177 |
|
178 return true; |
|
179 } |
|
180 |
|
181 void |
|
182 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest ) |
|
183 { |
|
184 if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible_t())) { |
|
185 NS_ABORT_OOM(aDest.Length() + aSource.Length()); |
|
186 } |
|
187 } |
|
188 |
|
189 bool |
|
190 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest, |
|
191 const mozilla::fallible_t& ) |
|
192 { |
|
193 nsACString::const_iterator source_start, source_end; |
|
194 CalculateUTF8Length calculator; |
|
195 copy_string(aSource.BeginReading(source_start), |
|
196 aSource.EndReading(source_end), calculator); |
|
197 |
|
198 uint32_t count = calculator.Length(); |
|
199 |
|
200 // Avoid making the string mutable if we're appending an empty string |
|
201 if (count) |
|
202 { |
|
203 uint32_t old_dest_length = aDest.Length(); |
|
204 |
|
205 // Grow the buffer if we need to. |
|
206 if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) { |
|
207 return false; |
|
208 } |
|
209 |
|
210 // All ready? Time to convert |
|
211 |
|
212 ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length); |
|
213 copy_string(aSource.BeginReading(source_start), |
|
214 aSource.EndReading(source_end), converter); |
|
215 |
|
216 NS_ASSERTION(converter.ErrorEncountered() || |
|
217 converter.Length() == count, |
|
218 "CalculateUTF8Length produced the wrong length"); |
|
219 |
|
220 if (converter.ErrorEncountered()) |
|
221 { |
|
222 NS_ERROR("Input wasn't UTF8 or incorrect length was calculated"); |
|
223 aDest.SetLength(old_dest_length); |
|
224 } |
|
225 } |
|
226 |
|
227 return true; |
|
228 } |
|
229 |
|
230 void |
|
231 AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest ) |
|
232 { |
|
233 if (aSource) { |
|
234 AppendUTF16toUTF8(nsDependentString(aSource), aDest); |
|
235 } |
|
236 } |
|
237 |
|
238 void |
|
239 AppendUTF8toUTF16( const char* aSource, nsAString& aDest ) |
|
240 { |
|
241 if (aSource) { |
|
242 AppendUTF8toUTF16(nsDependentCString(aSource), aDest); |
|
243 } |
|
244 } |
|
245 |
|
246 |
|
247 /** |
|
248 * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator). |
|
249 * |
|
250 * @param aSource an string you will eventually be making a copy of |
|
251 * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|. |
|
252 * |
|
253 */ |
|
254 template <class FromStringT, class ToCharT> |
|
255 inline |
|
256 ToCharT* |
|
257 AllocateStringCopy( const FromStringT& aSource, ToCharT* ) |
|
258 { |
|
259 return static_cast<ToCharT*>(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT))); |
|
260 } |
|
261 |
|
262 |
|
263 char* |
|
264 ToNewCString( const nsAString& aSource ) |
|
265 { |
|
266 char* result = AllocateStringCopy(aSource, (char*)0); |
|
267 if (!result) |
|
268 return nullptr; |
|
269 |
|
270 nsAString::const_iterator fromBegin, fromEnd; |
|
271 LossyConvertEncoding16to8 converter(result); |
|
272 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator(); |
|
273 return result; |
|
274 } |
|
275 |
|
276 char* |
|
277 ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count ) |
|
278 { |
|
279 nsAString::const_iterator start, end; |
|
280 CalculateUTF8Size calculator; |
|
281 copy_string(aSource.BeginReading(start), aSource.EndReading(end), |
|
282 calculator); |
|
283 |
|
284 if (aUTF8Count) |
|
285 *aUTF8Count = calculator.Size(); |
|
286 |
|
287 char *result = static_cast<char*> |
|
288 (nsMemory::Alloc(calculator.Size() + 1)); |
|
289 if (!result) |
|
290 return nullptr; |
|
291 |
|
292 ConvertUTF16toUTF8 converter(result); |
|
293 copy_string(aSource.BeginReading(start), aSource.EndReading(end), |
|
294 converter).write_terminator(); |
|
295 NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); |
|
296 |
|
297 return result; |
|
298 } |
|
299 |
|
300 char* |
|
301 ToNewCString( const nsACString& aSource ) |
|
302 { |
|
303 // no conversion needed, just allocate a buffer of the correct length and copy into it |
|
304 |
|
305 char* result = AllocateStringCopy(aSource, (char*)0); |
|
306 if (!result) |
|
307 return nullptr; |
|
308 |
|
309 nsACString::const_iterator fromBegin, fromEnd; |
|
310 char* toBegin = result; |
|
311 *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0); |
|
312 return result; |
|
313 } |
|
314 |
|
315 char16_t* |
|
316 ToNewUnicode( const nsAString& aSource ) |
|
317 { |
|
318 // no conversion needed, just allocate a buffer of the correct length and copy into it |
|
319 |
|
320 char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); |
|
321 if (!result) |
|
322 return nullptr; |
|
323 |
|
324 nsAString::const_iterator fromBegin, fromEnd; |
|
325 char16_t* toBegin = result; |
|
326 *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char16_t(0); |
|
327 return result; |
|
328 } |
|
329 |
|
330 char16_t* |
|
331 ToNewUnicode( const nsACString& aSource ) |
|
332 { |
|
333 char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); |
|
334 if (!result) |
|
335 return nullptr; |
|
336 |
|
337 nsACString::const_iterator fromBegin, fromEnd; |
|
338 LossyConvertEncoding8to16 converter(result); |
|
339 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator(); |
|
340 return result; |
|
341 } |
|
342 |
|
343 uint32_t |
|
344 CalcUTF8ToUnicodeLength( const nsACString& aSource) |
|
345 { |
|
346 nsACString::const_iterator start, end; |
|
347 CalculateUTF8Length calculator; |
|
348 copy_string(aSource.BeginReading(start), aSource.EndReading(end), |
|
349 calculator); |
|
350 return calculator.Length(); |
|
351 } |
|
352 |
|
353 char16_t* |
|
354 UTF8ToUnicodeBuffer( const nsACString& aSource, char16_t* aBuffer, uint32_t *aUTF16Count ) |
|
355 { |
|
356 nsACString::const_iterator start, end; |
|
357 ConvertUTF8toUTF16 converter(aBuffer); |
|
358 copy_string(aSource.BeginReading(start), |
|
359 aSource.EndReading(end), |
|
360 converter).write_terminator(); |
|
361 if (aUTF16Count) |
|
362 *aUTF16Count = converter.Length(); |
|
363 return aBuffer; |
|
364 } |
|
365 |
|
366 char16_t* |
|
367 UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count ) |
|
368 { |
|
369 const uint32_t length = CalcUTF8ToUnicodeLength(aSource); |
|
370 const size_t buffer_size = (length + 1) * sizeof(char16_t); |
|
371 char16_t *buffer = static_cast<char16_t*>(nsMemory::Alloc(buffer_size)); |
|
372 if (!buffer) |
|
373 return nullptr; |
|
374 |
|
375 uint32_t copied; |
|
376 UTF8ToUnicodeBuffer(aSource, buffer, &copied); |
|
377 NS_ASSERTION(length == copied, "length mismatch"); |
|
378 |
|
379 if (aUTF16Count) |
|
380 *aUTF16Count = copied; |
|
381 return buffer; |
|
382 } |
|
383 |
|
384 char16_t* |
|
385 CopyUnicodeTo( const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, uint32_t aLength ) |
|
386 { |
|
387 nsAString::const_iterator fromBegin, fromEnd; |
|
388 char16_t* toBegin = aDest; |
|
389 copy_string(aSource.BeginReading(fromBegin).advance( int32_t(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( int32_t(aSrcOffset+aLength) ), toBegin); |
|
390 return aDest; |
|
391 } |
|
392 |
|
393 void |
|
394 CopyUnicodeTo( const nsAString::const_iterator& aSrcStart, |
|
395 const nsAString::const_iterator& aSrcEnd, |
|
396 nsAString& aDest ) |
|
397 { |
|
398 nsAString::iterator writer; |
|
399 aDest.SetLength(Distance(aSrcStart, aSrcEnd)); |
|
400 |
|
401 aDest.BeginWriting(writer); |
|
402 nsAString::const_iterator fromBegin(aSrcStart); |
|
403 |
|
404 copy_string(fromBegin, aSrcEnd, writer); |
|
405 } |
|
406 |
|
407 void |
|
408 AppendUnicodeTo( const nsAString::const_iterator& aSrcStart, |
|
409 const nsAString::const_iterator& aSrcEnd, |
|
410 nsAString& aDest ) |
|
411 { |
|
412 nsAString::iterator writer; |
|
413 uint32_t oldLength = aDest.Length(); |
|
414 aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd)); |
|
415 |
|
416 aDest.BeginWriting(writer).advance(oldLength); |
|
417 nsAString::const_iterator fromBegin(aSrcStart); |
|
418 |
|
419 copy_string(fromBegin, aSrcEnd, writer); |
|
420 } |
|
421 |
|
422 bool |
|
423 IsASCII( const nsAString& aString ) |
|
424 { |
|
425 static const char16_t NOT_ASCII = char16_t(~0x007F); |
|
426 |
|
427 |
|
428 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character |
|
429 |
|
430 nsAString::const_iterator iter, done_reading; |
|
431 aString.BeginReading(iter); |
|
432 aString.EndReading(done_reading); |
|
433 |
|
434 const char16_t* c = iter.get(); |
|
435 const char16_t* end = done_reading.get(); |
|
436 |
|
437 while ( c < end ) |
|
438 { |
|
439 if ( *c++ & NOT_ASCII ) |
|
440 return false; |
|
441 } |
|
442 |
|
443 return true; |
|
444 } |
|
445 |
|
446 bool |
|
447 IsASCII( const nsACString& aString ) |
|
448 { |
|
449 static const char NOT_ASCII = char(~0x7F); |
|
450 |
|
451 |
|
452 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character |
|
453 |
|
454 nsACString::const_iterator iter, done_reading; |
|
455 aString.BeginReading(iter); |
|
456 aString.EndReading(done_reading); |
|
457 |
|
458 const char* c = iter.get(); |
|
459 const char* end = done_reading.get(); |
|
460 |
|
461 while ( c < end ) |
|
462 { |
|
463 if ( *c++ & NOT_ASCII ) |
|
464 return false; |
|
465 } |
|
466 |
|
467 return true; |
|
468 } |
|
469 |
|
470 bool |
|
471 IsUTF8( const nsACString& aString, bool aRejectNonChar ) |
|
472 { |
|
473 nsReadingIterator<char> done_reading; |
|
474 aString.EndReading(done_reading); |
|
475 |
|
476 int32_t state = 0; |
|
477 bool overlong = false; |
|
478 bool surrogate = false; |
|
479 bool nonchar = false; |
|
480 uint16_t olupper = 0; // overlong byte upper bound. |
|
481 uint16_t slower = 0; // surrogate byte lower bound. |
|
482 |
|
483 nsReadingIterator<char> iter; |
|
484 aString.BeginReading(iter); |
|
485 |
|
486 const char* ptr = iter.get(); |
|
487 const char* end = done_reading.get(); |
|
488 while ( ptr < end ) |
|
489 { |
|
490 uint8_t c; |
|
491 |
|
492 if (0 == state) |
|
493 { |
|
494 c = *ptr++; |
|
495 |
|
496 if ( UTF8traits::isASCII(c) ) |
|
497 continue; |
|
498 |
|
499 if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong. |
|
500 return false; |
|
501 else if ( UTF8traits::is2byte(c) ) |
|
502 state = 1; |
|
503 else if ( UTF8traits::is3byte(c) ) |
|
504 { |
|
505 state = 2; |
|
506 if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF] |
|
507 { |
|
508 overlong = true; |
|
509 olupper = 0x9F; |
|
510 } |
|
511 else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint |
|
512 { |
|
513 surrogate = true; |
|
514 slower = 0xA0; |
|
515 } |
|
516 else if ( c == 0xEF ) // EF BF [BE-BF] : non-character |
|
517 nonchar = true; |
|
518 } |
|
519 else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090) |
|
520 { |
|
521 state = 3; |
|
522 nonchar = true; |
|
523 if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2} |
|
524 { |
|
525 overlong = true; |
|
526 olupper = 0x8F; |
|
527 } |
|
528 else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF] |
|
529 { |
|
530 // actually not surrogates but codepoints beyond 0x10FFFF |
|
531 surrogate = true; |
|
532 slower = 0x90; |
|
533 } |
|
534 } |
|
535 else |
|
536 return false; // Not UTF-8 string |
|
537 } |
|
538 |
|
539 if (nonchar && !aRejectNonChar) |
|
540 nonchar = false; |
|
541 |
|
542 while ( ptr < end && state ) |
|
543 { |
|
544 c = *ptr++; |
|
545 --state; |
|
546 |
|
547 // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF] |
|
548 if ( nonchar && |
|
549 ( ( !state && c < 0xBE ) || |
|
550 ( state == 1 && c != 0xBF ) || |
|
551 ( state == 2 && 0x0F != (0x0F & c) ))) |
|
552 nonchar = false; |
|
553 |
|
554 if ( !UTF8traits::isInSeq(c) || ( overlong && c <= olupper ) || |
|
555 ( surrogate && slower <= c ) || ( nonchar && !state )) |
|
556 return false; // Not UTF-8 string |
|
557 |
|
558 overlong = surrogate = false; |
|
559 } |
|
560 } |
|
561 return !state; // state != 0 at the end indicates an invalid UTF-8 seq. |
|
562 } |
|
563 |
|
564 /** |
|
565 * A character sink for in-place case conversion. |
|
566 */ |
|
567 class ConvertToUpperCase |
|
568 { |
|
569 public: |
|
570 typedef char value_type; |
|
571 |
|
572 uint32_t |
|
573 write( const char* aSource, uint32_t aSourceLength ) |
|
574 { |
|
575 char* cp = const_cast<char*>(aSource); |
|
576 const char* end = aSource + aSourceLength; |
|
577 while (cp != end) { |
|
578 char ch = *cp; |
|
579 if ((ch >= 'a') && (ch <= 'z')) |
|
580 *cp = ch - ('a' - 'A'); |
|
581 ++cp; |
|
582 } |
|
583 return aSourceLength; |
|
584 } |
|
585 }; |
|
586 |
|
587 void |
|
588 ToUpperCase( nsCSubstring& aCString ) |
|
589 { |
|
590 ConvertToUpperCase converter; |
|
591 char* start; |
|
592 converter.write(aCString.BeginWriting(start), aCString.Length()); |
|
593 } |
|
594 |
|
595 /** |
|
596 * A character sink for copying with case conversion. |
|
597 */ |
|
598 class CopyToUpperCase |
|
599 { |
|
600 public: |
|
601 typedef char value_type; |
|
602 |
|
603 CopyToUpperCase( nsACString::iterator& aDestIter ) |
|
604 : mIter(aDestIter) |
|
605 { |
|
606 } |
|
607 |
|
608 uint32_t |
|
609 write( const char* aSource, uint32_t aSourceLength ) |
|
610 { |
|
611 uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength); |
|
612 char* cp = mIter.get(); |
|
613 const char* end = aSource + len; |
|
614 while (aSource != end) { |
|
615 char ch = *aSource; |
|
616 if ((ch >= 'a') && (ch <= 'z')) |
|
617 *cp = ch - ('a' - 'A'); |
|
618 else |
|
619 *cp = ch; |
|
620 ++aSource; |
|
621 ++cp; |
|
622 } |
|
623 mIter.advance(len); |
|
624 return len; |
|
625 } |
|
626 |
|
627 protected: |
|
628 nsACString::iterator& mIter; |
|
629 }; |
|
630 |
|
631 void |
|
632 ToUpperCase( const nsACString& aSource, nsACString& aDest ) |
|
633 { |
|
634 nsACString::const_iterator fromBegin, fromEnd; |
|
635 nsACString::iterator toBegin; |
|
636 aDest.SetLength(aSource.Length()); |
|
637 |
|
638 CopyToUpperCase converter(aDest.BeginWriting(toBegin)); |
|
639 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); |
|
640 } |
|
641 |
|
642 /** |
|
643 * A character sink for case conversion. |
|
644 */ |
|
645 class ConvertToLowerCase |
|
646 { |
|
647 public: |
|
648 typedef char value_type; |
|
649 |
|
650 uint32_t |
|
651 write( const char* aSource, uint32_t aSourceLength ) |
|
652 { |
|
653 char* cp = const_cast<char*>(aSource); |
|
654 const char* end = aSource + aSourceLength; |
|
655 while (cp != end) { |
|
656 char ch = *cp; |
|
657 if ((ch >= 'A') && (ch <= 'Z')) |
|
658 *cp = ch + ('a' - 'A'); |
|
659 ++cp; |
|
660 } |
|
661 return aSourceLength; |
|
662 } |
|
663 }; |
|
664 |
|
665 void |
|
666 ToLowerCase( nsCSubstring& aCString ) |
|
667 { |
|
668 ConvertToLowerCase converter; |
|
669 char* start; |
|
670 converter.write(aCString.BeginWriting(start), aCString.Length()); |
|
671 } |
|
672 |
|
673 /** |
|
674 * A character sink for copying with case conversion. |
|
675 */ |
|
676 class CopyToLowerCase |
|
677 { |
|
678 public: |
|
679 typedef char value_type; |
|
680 |
|
681 CopyToLowerCase( nsACString::iterator& aDestIter ) |
|
682 : mIter(aDestIter) |
|
683 { |
|
684 } |
|
685 |
|
686 uint32_t |
|
687 write( const char* aSource, uint32_t aSourceLength ) |
|
688 { |
|
689 uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength); |
|
690 char* cp = mIter.get(); |
|
691 const char* end = aSource + len; |
|
692 while (aSource != end) { |
|
693 char ch = *aSource; |
|
694 if ((ch >= 'A') && (ch <= 'Z')) |
|
695 *cp = ch + ('a' - 'A'); |
|
696 else |
|
697 *cp = ch; |
|
698 ++aSource; |
|
699 ++cp; |
|
700 } |
|
701 mIter.advance(len); |
|
702 return len; |
|
703 } |
|
704 |
|
705 protected: |
|
706 nsACString::iterator& mIter; |
|
707 }; |
|
708 |
|
709 void |
|
710 ToLowerCase( const nsACString& aSource, nsACString& aDest ) |
|
711 { |
|
712 nsACString::const_iterator fromBegin, fromEnd; |
|
713 nsACString::iterator toBegin; |
|
714 aDest.SetLength(aSource.Length()); |
|
715 |
|
716 CopyToLowerCase converter(aDest.BeginWriting(toBegin)); |
|
717 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); |
|
718 } |
|
719 |
|
720 bool |
|
721 ParseString(const nsACString& aSource, char aDelimiter, |
|
722 nsTArray<nsCString>& aArray) |
|
723 { |
|
724 nsACString::const_iterator start, end; |
|
725 aSource.BeginReading(start); |
|
726 aSource.EndReading(end); |
|
727 |
|
728 uint32_t oldLength = aArray.Length(); |
|
729 |
|
730 for (;;) |
|
731 { |
|
732 nsACString::const_iterator delimiter = start; |
|
733 FindCharInReadable(aDelimiter, delimiter, end); |
|
734 |
|
735 if (delimiter != start) |
|
736 { |
|
737 if (!aArray.AppendElement(Substring(start, delimiter))) |
|
738 { |
|
739 aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength); |
|
740 return false; |
|
741 } |
|
742 } |
|
743 |
|
744 if (delimiter == end) |
|
745 break; |
|
746 start = ++delimiter; |
|
747 if (start == end) |
|
748 break; |
|
749 } |
|
750 |
|
751 return true; |
|
752 } |
|
753 |
|
754 template <class StringT, class IteratorT, class Comparator> |
|
755 bool |
|
756 FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare ) |
|
757 { |
|
758 bool found_it = false; |
|
759 |
|
760 // only bother searching at all if we're given a non-empty range to search |
|
761 if ( aSearchStart != aSearchEnd ) |
|
762 { |
|
763 IteratorT aPatternStart, aPatternEnd; |
|
764 aPattern.BeginReading(aPatternStart); |
|
765 aPattern.EndReading(aPatternEnd); |
|
766 |
|
767 // outer loop keeps searching till we find it or run out of string to search |
|
768 while ( !found_it ) |
|
769 { |
|
770 // fast inner loop (that's what it's called, not what it is) looks for a potential match |
|
771 while ( aSearchStart != aSearchEnd && |
|
772 compare(aPatternStart.get(), aSearchStart.get(), 1, 1) ) |
|
773 ++aSearchStart; |
|
774 |
|
775 // if we broke out of the `fast' loop because we're out of string ... we're done: no match |
|
776 if ( aSearchStart == aSearchEnd ) |
|
777 break; |
|
778 |
|
779 // otherwise, we're at a potential match, let's see if we really hit one |
|
780 IteratorT testPattern(aPatternStart); |
|
781 IteratorT testSearch(aSearchStart); |
|
782 |
|
783 // slow inner loop verifies the potential match (found by the `fast' loop) at the current position |
|
784 for(;;) |
|
785 { |
|
786 // we already compared the first character in the outer loop, |
|
787 // so we'll advance before the next comparison |
|
788 ++testPattern; |
|
789 ++testSearch; |
|
790 |
|
791 // if we verified all the way to the end of the pattern, then we found it! |
|
792 if ( testPattern == aPatternEnd ) |
|
793 { |
|
794 found_it = true; |
|
795 aSearchEnd = testSearch; // return the exact found range through the parameters |
|
796 break; |
|
797 } |
|
798 |
|
799 // if we got to end of the string we're searching before we hit the end of the |
|
800 // pattern, we'll never find what we're looking for |
|
801 if ( testSearch == aSearchEnd ) |
|
802 { |
|
803 aSearchStart = aSearchEnd; |
|
804 break; |
|
805 } |
|
806 |
|
807 // else if we mismatched ... it's time to advance to the next search position |
|
808 // and get back into the `fast' loop |
|
809 if ( compare(testPattern.get(), testSearch.get(), 1, 1) ) |
|
810 { |
|
811 ++aSearchStart; |
|
812 break; |
|
813 } |
|
814 } |
|
815 } |
|
816 } |
|
817 |
|
818 return found_it; |
|
819 } |
|
820 |
|
821 /** |
|
822 * This searches the entire string from right to left, and returns the first match found, if any. |
|
823 */ |
|
824 template <class StringT, class IteratorT, class Comparator> |
|
825 bool |
|
826 RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare ) |
|
827 { |
|
828 IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; |
|
829 aPattern.BeginReading(patternStart); |
|
830 aPattern.EndReading(patternEnd); |
|
831 |
|
832 // Point to the last character in the pattern |
|
833 --patternEnd; |
|
834 // outer loop keeps searching till we run out of string to search |
|
835 while ( aSearchStart != searchEnd ) |
|
836 { |
|
837 // Point to the end position of the next possible match |
|
838 --searchEnd; |
|
839 |
|
840 // Check last character, if a match, explore further from here |
|
841 if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 ) |
|
842 { |
|
843 // We're at a potential match, let's see if we really hit one |
|
844 IteratorT testPattern(patternEnd); |
|
845 IteratorT testSearch(searchEnd); |
|
846 |
|
847 // inner loop verifies the potential match at the current position |
|
848 do |
|
849 { |
|
850 // if we verified all the way to the end of the pattern, then we found it! |
|
851 if ( testPattern == patternStart ) |
|
852 { |
|
853 aSearchStart = testSearch; // point to start of match |
|
854 aSearchEnd = ++searchEnd; // point to end of match |
|
855 return true; |
|
856 } |
|
857 |
|
858 // if we got to end of the string we're searching before we hit the end of the |
|
859 // pattern, we'll never find what we're looking for |
|
860 if ( testSearch == aSearchStart ) |
|
861 { |
|
862 aSearchStart = aSearchEnd; |
|
863 return false; |
|
864 } |
|
865 |
|
866 // test previous character for a match |
|
867 --testPattern; |
|
868 --testSearch; |
|
869 } |
|
870 while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 ); |
|
871 } |
|
872 } |
|
873 |
|
874 aSearchStart = aSearchEnd; |
|
875 return false; |
|
876 } |
|
877 |
|
878 bool |
|
879 FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator ) |
|
880 { |
|
881 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); |
|
882 } |
|
883 |
|
884 bool |
|
885 FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator) |
|
886 { |
|
887 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); |
|
888 } |
|
889 |
|
890 bool |
|
891 CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd ) |
|
892 { |
|
893 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator()); |
|
894 } |
|
895 |
|
896 bool |
|
897 RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator) |
|
898 { |
|
899 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); |
|
900 } |
|
901 |
|
902 bool |
|
903 RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator) |
|
904 { |
|
905 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); |
|
906 } |
|
907 |
|
908 bool |
|
909 FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd ) |
|
910 { |
|
911 int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); |
|
912 |
|
913 const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); |
|
914 if ( charFoundAt ) { |
|
915 aSearchStart.advance( charFoundAt - aSearchStart.get() ); |
|
916 return true; |
|
917 } |
|
918 |
|
919 aSearchStart.advance(fragmentLength); |
|
920 return false; |
|
921 } |
|
922 |
|
923 bool |
|
924 FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd ) |
|
925 { |
|
926 int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); |
|
927 |
|
928 const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); |
|
929 if ( charFoundAt ) { |
|
930 aSearchStart.advance( charFoundAt - aSearchStart.get() ); |
|
931 return true; |
|
932 } |
|
933 |
|
934 aSearchStart.advance(fragmentLength); |
|
935 return false; |
|
936 } |
|
937 |
|
938 uint32_t |
|
939 CountCharInReadable( const nsAString& aStr, |
|
940 char16_t aChar ) |
|
941 { |
|
942 uint32_t count = 0; |
|
943 nsAString::const_iterator begin, end; |
|
944 |
|
945 aStr.BeginReading(begin); |
|
946 aStr.EndReading(end); |
|
947 |
|
948 while (begin != end) { |
|
949 if (*begin == aChar) { |
|
950 ++count; |
|
951 } |
|
952 ++begin; |
|
953 } |
|
954 |
|
955 return count; |
|
956 } |
|
957 |
|
958 uint32_t |
|
959 CountCharInReadable( const nsACString& aStr, |
|
960 char aChar ) |
|
961 { |
|
962 uint32_t count = 0; |
|
963 nsACString::const_iterator begin, end; |
|
964 |
|
965 aStr.BeginReading(begin); |
|
966 aStr.EndReading(end); |
|
967 |
|
968 while (begin != end) { |
|
969 if (*begin == aChar) { |
|
970 ++count; |
|
971 } |
|
972 ++begin; |
|
973 } |
|
974 |
|
975 return count; |
|
976 } |
|
977 |
|
978 bool |
|
979 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring, |
|
980 const nsStringComparator& aComparator ) |
|
981 { |
|
982 nsAString::size_type src_len = aSource.Length(), |
|
983 sub_len = aSubstring.Length(); |
|
984 if (sub_len > src_len) |
|
985 return false; |
|
986 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); |
|
987 } |
|
988 |
|
989 bool |
|
990 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring, |
|
991 const nsCStringComparator& aComparator ) |
|
992 { |
|
993 nsACString::size_type src_len = aSource.Length(), |
|
994 sub_len = aSubstring.Length(); |
|
995 if (sub_len > src_len) |
|
996 return false; |
|
997 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); |
|
998 } |
|
999 |
|
1000 bool |
|
1001 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring, |
|
1002 const nsStringComparator& aComparator ) |
|
1003 { |
|
1004 nsAString::size_type src_len = aSource.Length(), |
|
1005 sub_len = aSubstring.Length(); |
|
1006 if (sub_len > src_len) |
|
1007 return false; |
|
1008 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, |
|
1009 aComparator); |
|
1010 } |
|
1011 |
|
1012 bool |
|
1013 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring, |
|
1014 const nsCStringComparator& aComparator ) |
|
1015 { |
|
1016 nsACString::size_type src_len = aSource.Length(), |
|
1017 sub_len = aSubstring.Length(); |
|
1018 if (sub_len > src_len) |
|
1019 return false; |
|
1020 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, |
|
1021 aComparator); |
|
1022 } |
|
1023 |
|
1024 |
|
1025 |
|
1026 static const char16_t empty_buffer[1] = { '\0' }; |
|
1027 |
|
1028 const nsAFlatString& |
|
1029 EmptyString() |
|
1030 { |
|
1031 static const nsDependentString sEmpty(empty_buffer); |
|
1032 |
|
1033 return sEmpty; |
|
1034 } |
|
1035 |
|
1036 const nsAFlatCString& |
|
1037 EmptyCString() |
|
1038 { |
|
1039 static const nsDependentCString sEmpty((const char *)empty_buffer); |
|
1040 |
|
1041 return sEmpty; |
|
1042 } |
|
1043 |
|
1044 const nsAFlatString& |
|
1045 NullString() |
|
1046 { |
|
1047 static const nsXPIDLString sNull; |
|
1048 |
|
1049 return sNull; |
|
1050 } |
|
1051 |
|
1052 const nsAFlatCString& |
|
1053 NullCString() |
|
1054 { |
|
1055 static const nsXPIDLCString sNull; |
|
1056 |
|
1057 return sNull; |
|
1058 } |
|
1059 |
|
1060 int32_t |
|
1061 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, |
|
1062 const nsASingleFragmentString& aUTF16String) |
|
1063 { |
|
1064 static const uint32_t NOT_ASCII = uint32_t(~0x7F); |
|
1065 |
|
1066 const char *u8, *u8end; |
|
1067 aUTF8String.BeginReading(u8); |
|
1068 aUTF8String.EndReading(u8end); |
|
1069 |
|
1070 const char16_t *u16, *u16end; |
|
1071 aUTF16String.BeginReading(u16); |
|
1072 aUTF16String.EndReading(u16end); |
|
1073 |
|
1074 while (u8 != u8end && u16 != u16end) |
|
1075 { |
|
1076 // Cast away the signedness of *u8 to prevent signextension when |
|
1077 // converting to uint32_t |
|
1078 uint32_t c8_32 = (uint8_t)*u8; |
|
1079 |
|
1080 if (c8_32 & NOT_ASCII) |
|
1081 { |
|
1082 bool err; |
|
1083 c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err); |
|
1084 if (err) |
|
1085 return INT32_MIN; |
|
1086 |
|
1087 uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end); |
|
1088 // The above UTF16CharEnumerator::NextChar() calls can |
|
1089 // fail, but if it does for anything other than no data to |
|
1090 // look at (which can't happen here), it returns the |
|
1091 // Unicode replacement character 0xFFFD for the invalid |
|
1092 // data they were fed. Ignore that error and treat invalid |
|
1093 // UTF16 as 0xFFFD. |
|
1094 // |
|
1095 // This matches what our UTF16 to UTF8 conversion code |
|
1096 // does, and thus a UTF8 string that came from an invalid |
|
1097 // UTF16 string will compare equal to the invalid UTF16 |
|
1098 // string it came from. Same is true for any other UTF16 |
|
1099 // string differs only in the invalid part of the string. |
|
1100 |
|
1101 if (c8_32 != c16_32) |
|
1102 return c8_32 < c16_32 ? -1 : 1; |
|
1103 } |
|
1104 else |
|
1105 { |
|
1106 if (c8_32 != *u16) |
|
1107 return c8_32 > *u16 ? 1 : -1; |
|
1108 |
|
1109 ++u8; |
|
1110 ++u16; |
|
1111 } |
|
1112 } |
|
1113 |
|
1114 if (u8 != u8end) |
|
1115 { |
|
1116 // We get to the end of the UTF16 string, but no to the end of |
|
1117 // the UTF8 string. The UTF8 string is longer than the UTF16 |
|
1118 // string |
|
1119 |
|
1120 return 1; |
|
1121 } |
|
1122 |
|
1123 if (u16 != u16end) |
|
1124 { |
|
1125 // We get to the end of the UTF8 string, but no to the end of |
|
1126 // the UTF16 string. The UTF16 string is longer than the UTF8 |
|
1127 // string |
|
1128 |
|
1129 return -1; |
|
1130 } |
|
1131 |
|
1132 // The two strings match. |
|
1133 |
|
1134 return 0; |
|
1135 } |
|
1136 |
|
1137 void |
|
1138 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) |
|
1139 { |
|
1140 NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); |
|
1141 if (IS_IN_BMP(aSource)) |
|
1142 { |
|
1143 aDest.Append(char16_t(aSource)); |
|
1144 } |
|
1145 else |
|
1146 { |
|
1147 aDest.Append(H_SURROGATE(aSource)); |
|
1148 aDest.Append(L_SURROGATE(aSource)); |
|
1149 } |
|
1150 } |