1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/string/src/nsReadableUtils.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1150 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsReadableUtils.h" 1.10 + 1.11 +#include "nsMemory.h" 1.12 +#include "nsString.h" 1.13 +#include "nsTArray.h" 1.14 +#include "nsUTF8Utils.h" 1.15 + 1.16 +void 1.17 +LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest ) 1.18 + { 1.19 + aDest.Truncate(); 1.20 + LossyAppendUTF16toASCII(aSource, aDest); 1.21 + } 1.22 + 1.23 +void 1.24 +CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest ) 1.25 + { 1.26 + aDest.Truncate(); 1.27 + AppendASCIItoUTF16(aSource, aDest); 1.28 + } 1.29 + 1.30 +void 1.31 +LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest ) 1.32 + { 1.33 + aDest.Truncate(); 1.34 + if (aSource) { 1.35 + LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); 1.36 + } 1.37 + } 1.38 + 1.39 +void 1.40 +CopyASCIItoUTF16( const char* aSource, nsAString& aDest ) 1.41 + { 1.42 + aDest.Truncate(); 1.43 + if (aSource) { 1.44 + AppendASCIItoUTF16(nsDependentCString(aSource), aDest); 1.45 + } 1.46 + } 1.47 + 1.48 +void 1.49 +CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest ) 1.50 + { 1.51 + aDest.Truncate(); 1.52 + AppendUTF16toUTF8(aSource, aDest); 1.53 + } 1.54 + 1.55 +void 1.56 +CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest ) 1.57 + { 1.58 + aDest.Truncate(); 1.59 + AppendUTF8toUTF16(aSource, aDest); 1.60 + } 1.61 + 1.62 +void 1.63 +CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest ) 1.64 + { 1.65 + aDest.Truncate(); 1.66 + AppendUTF16toUTF8(aSource, aDest); 1.67 + } 1.68 + 1.69 +void 1.70 +CopyUTF8toUTF16( const char* aSource, nsAString& aDest ) 1.71 + { 1.72 + aDest.Truncate(); 1.73 + AppendUTF8toUTF16(aSource, aDest); 1.74 + } 1.75 + 1.76 +void 1.77 +LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest ) 1.78 + { 1.79 + uint32_t old_dest_length = aDest.Length(); 1.80 + aDest.SetLength(old_dest_length + aSource.Length()); 1.81 + 1.82 + nsAString::const_iterator fromBegin, fromEnd; 1.83 + 1.84 + nsACString::iterator dest; 1.85 + aDest.BeginWriting(dest); 1.86 + 1.87 + dest.advance(old_dest_length); 1.88 + 1.89 + // right now, this won't work on multi-fragment destinations 1.90 + LossyConvertEncoding16to8 converter(dest.get()); 1.91 + 1.92 + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); 1.93 + } 1.94 + 1.95 +void 1.96 +AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest ) 1.97 + { 1.98 + if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible_t())) { 1.99 + NS_ABORT_OOM(aDest.Length() + aSource.Length()); 1.100 + } 1.101 + } 1.102 + 1.103 +bool 1.104 +AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest, 1.105 + const mozilla::fallible_t& ) 1.106 + { 1.107 + uint32_t old_dest_length = aDest.Length(); 1.108 + if (!aDest.SetLength(old_dest_length + aSource.Length(), mozilla::fallible_t())) { 1.109 + return false; 1.110 + } 1.111 + 1.112 + nsACString::const_iterator fromBegin, fromEnd; 1.113 + 1.114 + nsAString::iterator dest; 1.115 + aDest.BeginWriting(dest); 1.116 + 1.117 + dest.advance(old_dest_length); 1.118 + 1.119 + // right now, this won't work on multi-fragment destinations 1.120 + LossyConvertEncoding8to16 converter(dest.get()); 1.121 + 1.122 + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); 1.123 + return true; 1.124 + } 1.125 + 1.126 +void 1.127 +LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest ) 1.128 + { 1.129 + if (aSource) { 1.130 + LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); 1.131 + } 1.132 + } 1.133 + 1.134 +void 1.135 +AppendASCIItoUTF16( const char* aSource, nsAString& aDest ) 1.136 + { 1.137 + if (aSource) { 1.138 + AppendASCIItoUTF16(nsDependentCString(aSource), aDest); 1.139 + } 1.140 + } 1.141 + 1.142 +void 1.143 +AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest ) 1.144 +{ 1.145 + if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible_t())) { 1.146 + NS_ABORT_OOM(aDest.Length() + aSource.Length()); 1.147 + } 1.148 +} 1.149 + 1.150 +bool 1.151 +AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest, 1.152 + const mozilla::fallible_t& ) 1.153 + { 1.154 + nsAString::const_iterator source_start, source_end; 1.155 + CalculateUTF8Size calculator; 1.156 + copy_string(aSource.BeginReading(source_start), 1.157 + aSource.EndReading(source_end), calculator); 1.158 + 1.159 + uint32_t count = calculator.Size(); 1.160 + 1.161 + if (count) 1.162 + { 1.163 + uint32_t old_dest_length = aDest.Length(); 1.164 + 1.165 + // Grow the buffer if we need to. 1.166 + if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) { 1.167 + return false; 1.168 + } 1.169 + 1.170 + // All ready? Time to convert 1.171 + 1.172 + ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length); 1.173 + copy_string(aSource.BeginReading(source_start), 1.174 + aSource.EndReading(source_end), converter); 1.175 + 1.176 + NS_ASSERTION(converter.Size() == count, 1.177 + "Unexpected disparity between CalculateUTF8Size and " 1.178 + "ConvertUTF16toUTF8"); 1.179 + } 1.180 + 1.181 + return true; 1.182 + } 1.183 + 1.184 +void 1.185 +AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest ) 1.186 +{ 1.187 + if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible_t())) { 1.188 + NS_ABORT_OOM(aDest.Length() + aSource.Length()); 1.189 + } 1.190 +} 1.191 + 1.192 +bool 1.193 +AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest, 1.194 + const mozilla::fallible_t& ) 1.195 + { 1.196 + nsACString::const_iterator source_start, source_end; 1.197 + CalculateUTF8Length calculator; 1.198 + copy_string(aSource.BeginReading(source_start), 1.199 + aSource.EndReading(source_end), calculator); 1.200 + 1.201 + uint32_t count = calculator.Length(); 1.202 + 1.203 + // Avoid making the string mutable if we're appending an empty string 1.204 + if (count) 1.205 + { 1.206 + uint32_t old_dest_length = aDest.Length(); 1.207 + 1.208 + // Grow the buffer if we need to. 1.209 + if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) { 1.210 + return false; 1.211 + } 1.212 + 1.213 + // All ready? Time to convert 1.214 + 1.215 + ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length); 1.216 + copy_string(aSource.BeginReading(source_start), 1.217 + aSource.EndReading(source_end), converter); 1.218 + 1.219 + NS_ASSERTION(converter.ErrorEncountered() || 1.220 + converter.Length() == count, 1.221 + "CalculateUTF8Length produced the wrong length"); 1.222 + 1.223 + if (converter.ErrorEncountered()) 1.224 + { 1.225 + NS_ERROR("Input wasn't UTF8 or incorrect length was calculated"); 1.226 + aDest.SetLength(old_dest_length); 1.227 + } 1.228 + } 1.229 + 1.230 + return true; 1.231 + } 1.232 + 1.233 +void 1.234 +AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest ) 1.235 + { 1.236 + if (aSource) { 1.237 + AppendUTF16toUTF8(nsDependentString(aSource), aDest); 1.238 + } 1.239 + } 1.240 + 1.241 +void 1.242 +AppendUTF8toUTF16( const char* aSource, nsAString& aDest ) 1.243 + { 1.244 + if (aSource) { 1.245 + AppendUTF8toUTF16(nsDependentCString(aSource), aDest); 1.246 + } 1.247 + } 1.248 + 1.249 + 1.250 + /** 1.251 + * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator). 1.252 + * 1.253 + * @param aSource an string you will eventually be making a copy of 1.254 + * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|. 1.255 + * 1.256 + */ 1.257 +template <class FromStringT, class ToCharT> 1.258 +inline 1.259 +ToCharT* 1.260 +AllocateStringCopy( const FromStringT& aSource, ToCharT* ) 1.261 + { 1.262 + return static_cast<ToCharT*>(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT))); 1.263 + } 1.264 + 1.265 + 1.266 +char* 1.267 +ToNewCString( const nsAString& aSource ) 1.268 + { 1.269 + char* result = AllocateStringCopy(aSource, (char*)0); 1.270 + if (!result) 1.271 + return nullptr; 1.272 + 1.273 + nsAString::const_iterator fromBegin, fromEnd; 1.274 + LossyConvertEncoding16to8 converter(result); 1.275 + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator(); 1.276 + return result; 1.277 + } 1.278 + 1.279 +char* 1.280 +ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count ) 1.281 + { 1.282 + nsAString::const_iterator start, end; 1.283 + CalculateUTF8Size calculator; 1.284 + copy_string(aSource.BeginReading(start), aSource.EndReading(end), 1.285 + calculator); 1.286 + 1.287 + if (aUTF8Count) 1.288 + *aUTF8Count = calculator.Size(); 1.289 + 1.290 + char *result = static_cast<char*> 1.291 + (nsMemory::Alloc(calculator.Size() + 1)); 1.292 + if (!result) 1.293 + return nullptr; 1.294 + 1.295 + ConvertUTF16toUTF8 converter(result); 1.296 + copy_string(aSource.BeginReading(start), aSource.EndReading(end), 1.297 + converter).write_terminator(); 1.298 + NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); 1.299 + 1.300 + return result; 1.301 + } 1.302 + 1.303 +char* 1.304 +ToNewCString( const nsACString& aSource ) 1.305 + { 1.306 + // no conversion needed, just allocate a buffer of the correct length and copy into it 1.307 + 1.308 + char* result = AllocateStringCopy(aSource, (char*)0); 1.309 + if (!result) 1.310 + return nullptr; 1.311 + 1.312 + nsACString::const_iterator fromBegin, fromEnd; 1.313 + char* toBegin = result; 1.314 + *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0); 1.315 + return result; 1.316 + } 1.317 + 1.318 +char16_t* 1.319 +ToNewUnicode( const nsAString& aSource ) 1.320 + { 1.321 + // no conversion needed, just allocate a buffer of the correct length and copy into it 1.322 + 1.323 + char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); 1.324 + if (!result) 1.325 + return nullptr; 1.326 + 1.327 + nsAString::const_iterator fromBegin, fromEnd; 1.328 + char16_t* toBegin = result; 1.329 + *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char16_t(0); 1.330 + return result; 1.331 + } 1.332 + 1.333 +char16_t* 1.334 +ToNewUnicode( const nsACString& aSource ) 1.335 + { 1.336 + char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); 1.337 + if (!result) 1.338 + return nullptr; 1.339 + 1.340 + nsACString::const_iterator fromBegin, fromEnd; 1.341 + LossyConvertEncoding8to16 converter(result); 1.342 + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator(); 1.343 + return result; 1.344 + } 1.345 + 1.346 +uint32_t 1.347 +CalcUTF8ToUnicodeLength( const nsACString& aSource) 1.348 + { 1.349 + nsACString::const_iterator start, end; 1.350 + CalculateUTF8Length calculator; 1.351 + copy_string(aSource.BeginReading(start), aSource.EndReading(end), 1.352 + calculator); 1.353 + return calculator.Length(); 1.354 + } 1.355 + 1.356 +char16_t* 1.357 +UTF8ToUnicodeBuffer( const nsACString& aSource, char16_t* aBuffer, uint32_t *aUTF16Count ) 1.358 + { 1.359 + nsACString::const_iterator start, end; 1.360 + ConvertUTF8toUTF16 converter(aBuffer); 1.361 + copy_string(aSource.BeginReading(start), 1.362 + aSource.EndReading(end), 1.363 + converter).write_terminator(); 1.364 + if (aUTF16Count) 1.365 + *aUTF16Count = converter.Length(); 1.366 + return aBuffer; 1.367 + } 1.368 + 1.369 +char16_t* 1.370 +UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count ) 1.371 + { 1.372 + const uint32_t length = CalcUTF8ToUnicodeLength(aSource); 1.373 + const size_t buffer_size = (length + 1) * sizeof(char16_t); 1.374 + char16_t *buffer = static_cast<char16_t*>(nsMemory::Alloc(buffer_size)); 1.375 + if (!buffer) 1.376 + return nullptr; 1.377 + 1.378 + uint32_t copied; 1.379 + UTF8ToUnicodeBuffer(aSource, buffer, &copied); 1.380 + NS_ASSERTION(length == copied, "length mismatch"); 1.381 + 1.382 + if (aUTF16Count) 1.383 + *aUTF16Count = copied; 1.384 + return buffer; 1.385 + } 1.386 + 1.387 +char16_t* 1.388 +CopyUnicodeTo( const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, uint32_t aLength ) 1.389 + { 1.390 + nsAString::const_iterator fromBegin, fromEnd; 1.391 + char16_t* toBegin = aDest; 1.392 + copy_string(aSource.BeginReading(fromBegin).advance( int32_t(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( int32_t(aSrcOffset+aLength) ), toBegin); 1.393 + return aDest; 1.394 + } 1.395 + 1.396 +void 1.397 +CopyUnicodeTo( const nsAString::const_iterator& aSrcStart, 1.398 + const nsAString::const_iterator& aSrcEnd, 1.399 + nsAString& aDest ) 1.400 + { 1.401 + nsAString::iterator writer; 1.402 + aDest.SetLength(Distance(aSrcStart, aSrcEnd)); 1.403 + 1.404 + aDest.BeginWriting(writer); 1.405 + nsAString::const_iterator fromBegin(aSrcStart); 1.406 + 1.407 + copy_string(fromBegin, aSrcEnd, writer); 1.408 + } 1.409 + 1.410 +void 1.411 +AppendUnicodeTo( const nsAString::const_iterator& aSrcStart, 1.412 + const nsAString::const_iterator& aSrcEnd, 1.413 + nsAString& aDest ) 1.414 + { 1.415 + nsAString::iterator writer; 1.416 + uint32_t oldLength = aDest.Length(); 1.417 + aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd)); 1.418 + 1.419 + aDest.BeginWriting(writer).advance(oldLength); 1.420 + nsAString::const_iterator fromBegin(aSrcStart); 1.421 + 1.422 + copy_string(fromBegin, aSrcEnd, writer); 1.423 + } 1.424 + 1.425 +bool 1.426 +IsASCII( const nsAString& aString ) 1.427 + { 1.428 + static const char16_t NOT_ASCII = char16_t(~0x007F); 1.429 + 1.430 + 1.431 + // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character 1.432 + 1.433 + nsAString::const_iterator iter, done_reading; 1.434 + aString.BeginReading(iter); 1.435 + aString.EndReading(done_reading); 1.436 + 1.437 + const char16_t* c = iter.get(); 1.438 + const char16_t* end = done_reading.get(); 1.439 + 1.440 + while ( c < end ) 1.441 + { 1.442 + if ( *c++ & NOT_ASCII ) 1.443 + return false; 1.444 + } 1.445 + 1.446 + return true; 1.447 + } 1.448 + 1.449 +bool 1.450 +IsASCII( const nsACString& aString ) 1.451 + { 1.452 + static const char NOT_ASCII = char(~0x7F); 1.453 + 1.454 + 1.455 + // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character 1.456 + 1.457 + nsACString::const_iterator iter, done_reading; 1.458 + aString.BeginReading(iter); 1.459 + aString.EndReading(done_reading); 1.460 + 1.461 + const char* c = iter.get(); 1.462 + const char* end = done_reading.get(); 1.463 + 1.464 + while ( c < end ) 1.465 + { 1.466 + if ( *c++ & NOT_ASCII ) 1.467 + return false; 1.468 + } 1.469 + 1.470 + return true; 1.471 + } 1.472 + 1.473 +bool 1.474 +IsUTF8( const nsACString& aString, bool aRejectNonChar ) 1.475 + { 1.476 + nsReadingIterator<char> done_reading; 1.477 + aString.EndReading(done_reading); 1.478 + 1.479 + int32_t state = 0; 1.480 + bool overlong = false; 1.481 + bool surrogate = false; 1.482 + bool nonchar = false; 1.483 + uint16_t olupper = 0; // overlong byte upper bound. 1.484 + uint16_t slower = 0; // surrogate byte lower bound. 1.485 + 1.486 + nsReadingIterator<char> iter; 1.487 + aString.BeginReading(iter); 1.488 + 1.489 + const char* ptr = iter.get(); 1.490 + const char* end = done_reading.get(); 1.491 + while ( ptr < end ) 1.492 + { 1.493 + uint8_t c; 1.494 + 1.495 + if (0 == state) 1.496 + { 1.497 + c = *ptr++; 1.498 + 1.499 + if ( UTF8traits::isASCII(c) ) 1.500 + continue; 1.501 + 1.502 + if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong. 1.503 + return false; 1.504 + else if ( UTF8traits::is2byte(c) ) 1.505 + state = 1; 1.506 + else if ( UTF8traits::is3byte(c) ) 1.507 + { 1.508 + state = 2; 1.509 + if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF] 1.510 + { 1.511 + overlong = true; 1.512 + olupper = 0x9F; 1.513 + } 1.514 + else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint 1.515 + { 1.516 + surrogate = true; 1.517 + slower = 0xA0; 1.518 + } 1.519 + else if ( c == 0xEF ) // EF BF [BE-BF] : non-character 1.520 + nonchar = true; 1.521 + } 1.522 + else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090) 1.523 + { 1.524 + state = 3; 1.525 + nonchar = true; 1.526 + if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2} 1.527 + { 1.528 + overlong = true; 1.529 + olupper = 0x8F; 1.530 + } 1.531 + else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF] 1.532 + { 1.533 + // actually not surrogates but codepoints beyond 0x10FFFF 1.534 + surrogate = true; 1.535 + slower = 0x90; 1.536 + } 1.537 + } 1.538 + else 1.539 + return false; // Not UTF-8 string 1.540 + } 1.541 + 1.542 + if (nonchar && !aRejectNonChar) 1.543 + nonchar = false; 1.544 + 1.545 + while ( ptr < end && state ) 1.546 + { 1.547 + c = *ptr++; 1.548 + --state; 1.549 + 1.550 + // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF] 1.551 + if ( nonchar && 1.552 + ( ( !state && c < 0xBE ) || 1.553 + ( state == 1 && c != 0xBF ) || 1.554 + ( state == 2 && 0x0F != (0x0F & c) ))) 1.555 + nonchar = false; 1.556 + 1.557 + if ( !UTF8traits::isInSeq(c) || ( overlong && c <= olupper ) || 1.558 + ( surrogate && slower <= c ) || ( nonchar && !state )) 1.559 + return false; // Not UTF-8 string 1.560 + 1.561 + overlong = surrogate = false; 1.562 + } 1.563 + } 1.564 + return !state; // state != 0 at the end indicates an invalid UTF-8 seq. 1.565 + } 1.566 + 1.567 + /** 1.568 + * A character sink for in-place case conversion. 1.569 + */ 1.570 +class ConvertToUpperCase 1.571 + { 1.572 + public: 1.573 + typedef char value_type; 1.574 + 1.575 + uint32_t 1.576 + write( const char* aSource, uint32_t aSourceLength ) 1.577 + { 1.578 + char* cp = const_cast<char*>(aSource); 1.579 + const char* end = aSource + aSourceLength; 1.580 + while (cp != end) { 1.581 + char ch = *cp; 1.582 + if ((ch >= 'a') && (ch <= 'z')) 1.583 + *cp = ch - ('a' - 'A'); 1.584 + ++cp; 1.585 + } 1.586 + return aSourceLength; 1.587 + } 1.588 + }; 1.589 + 1.590 +void 1.591 +ToUpperCase( nsCSubstring& aCString ) 1.592 + { 1.593 + ConvertToUpperCase converter; 1.594 + char* start; 1.595 + converter.write(aCString.BeginWriting(start), aCString.Length()); 1.596 + } 1.597 + 1.598 + /** 1.599 + * A character sink for copying with case conversion. 1.600 + */ 1.601 +class CopyToUpperCase 1.602 + { 1.603 + public: 1.604 + typedef char value_type; 1.605 + 1.606 + CopyToUpperCase( nsACString::iterator& aDestIter ) 1.607 + : mIter(aDestIter) 1.608 + { 1.609 + } 1.610 + 1.611 + uint32_t 1.612 + write( const char* aSource, uint32_t aSourceLength ) 1.613 + { 1.614 + uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength); 1.615 + char* cp = mIter.get(); 1.616 + const char* end = aSource + len; 1.617 + while (aSource != end) { 1.618 + char ch = *aSource; 1.619 + if ((ch >= 'a') && (ch <= 'z')) 1.620 + *cp = ch - ('a' - 'A'); 1.621 + else 1.622 + *cp = ch; 1.623 + ++aSource; 1.624 + ++cp; 1.625 + } 1.626 + mIter.advance(len); 1.627 + return len; 1.628 + } 1.629 + 1.630 + protected: 1.631 + nsACString::iterator& mIter; 1.632 + }; 1.633 + 1.634 +void 1.635 +ToUpperCase( const nsACString& aSource, nsACString& aDest ) 1.636 + { 1.637 + nsACString::const_iterator fromBegin, fromEnd; 1.638 + nsACString::iterator toBegin; 1.639 + aDest.SetLength(aSource.Length()); 1.640 + 1.641 + CopyToUpperCase converter(aDest.BeginWriting(toBegin)); 1.642 + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); 1.643 + } 1.644 + 1.645 + /** 1.646 + * A character sink for case conversion. 1.647 + */ 1.648 +class ConvertToLowerCase 1.649 + { 1.650 + public: 1.651 + typedef char value_type; 1.652 + 1.653 + uint32_t 1.654 + write( const char* aSource, uint32_t aSourceLength ) 1.655 + { 1.656 + char* cp = const_cast<char*>(aSource); 1.657 + const char* end = aSource + aSourceLength; 1.658 + while (cp != end) { 1.659 + char ch = *cp; 1.660 + if ((ch >= 'A') && (ch <= 'Z')) 1.661 + *cp = ch + ('a' - 'A'); 1.662 + ++cp; 1.663 + } 1.664 + return aSourceLength; 1.665 + } 1.666 + }; 1.667 + 1.668 +void 1.669 +ToLowerCase( nsCSubstring& aCString ) 1.670 + { 1.671 + ConvertToLowerCase converter; 1.672 + char* start; 1.673 + converter.write(aCString.BeginWriting(start), aCString.Length()); 1.674 + } 1.675 + 1.676 + /** 1.677 + * A character sink for copying with case conversion. 1.678 + */ 1.679 +class CopyToLowerCase 1.680 + { 1.681 + public: 1.682 + typedef char value_type; 1.683 + 1.684 + CopyToLowerCase( nsACString::iterator& aDestIter ) 1.685 + : mIter(aDestIter) 1.686 + { 1.687 + } 1.688 + 1.689 + uint32_t 1.690 + write( const char* aSource, uint32_t aSourceLength ) 1.691 + { 1.692 + uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength); 1.693 + char* cp = mIter.get(); 1.694 + const char* end = aSource + len; 1.695 + while (aSource != end) { 1.696 + char ch = *aSource; 1.697 + if ((ch >= 'A') && (ch <= 'Z')) 1.698 + *cp = ch + ('a' - 'A'); 1.699 + else 1.700 + *cp = ch; 1.701 + ++aSource; 1.702 + ++cp; 1.703 + } 1.704 + mIter.advance(len); 1.705 + return len; 1.706 + } 1.707 + 1.708 + protected: 1.709 + nsACString::iterator& mIter; 1.710 + }; 1.711 + 1.712 +void 1.713 +ToLowerCase( const nsACString& aSource, nsACString& aDest ) 1.714 + { 1.715 + nsACString::const_iterator fromBegin, fromEnd; 1.716 + nsACString::iterator toBegin; 1.717 + aDest.SetLength(aSource.Length()); 1.718 + 1.719 + CopyToLowerCase converter(aDest.BeginWriting(toBegin)); 1.720 + copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); 1.721 + } 1.722 + 1.723 +bool 1.724 +ParseString(const nsACString& aSource, char aDelimiter, 1.725 + nsTArray<nsCString>& aArray) 1.726 + { 1.727 + nsACString::const_iterator start, end; 1.728 + aSource.BeginReading(start); 1.729 + aSource.EndReading(end); 1.730 + 1.731 + uint32_t oldLength = aArray.Length(); 1.732 + 1.733 + for (;;) 1.734 + { 1.735 + nsACString::const_iterator delimiter = start; 1.736 + FindCharInReadable(aDelimiter, delimiter, end); 1.737 + 1.738 + if (delimiter != start) 1.739 + { 1.740 + if (!aArray.AppendElement(Substring(start, delimiter))) 1.741 + { 1.742 + aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength); 1.743 + return false; 1.744 + } 1.745 + } 1.746 + 1.747 + if (delimiter == end) 1.748 + break; 1.749 + start = ++delimiter; 1.750 + if (start == end) 1.751 + break; 1.752 + } 1.753 + 1.754 + return true; 1.755 + } 1.756 + 1.757 +template <class StringT, class IteratorT, class Comparator> 1.758 +bool 1.759 +FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare ) 1.760 + { 1.761 + bool found_it = false; 1.762 + 1.763 + // only bother searching at all if we're given a non-empty range to search 1.764 + if ( aSearchStart != aSearchEnd ) 1.765 + { 1.766 + IteratorT aPatternStart, aPatternEnd; 1.767 + aPattern.BeginReading(aPatternStart); 1.768 + aPattern.EndReading(aPatternEnd); 1.769 + 1.770 + // outer loop keeps searching till we find it or run out of string to search 1.771 + while ( !found_it ) 1.772 + { 1.773 + // fast inner loop (that's what it's called, not what it is) looks for a potential match 1.774 + while ( aSearchStart != aSearchEnd && 1.775 + compare(aPatternStart.get(), aSearchStart.get(), 1, 1) ) 1.776 + ++aSearchStart; 1.777 + 1.778 + // if we broke out of the `fast' loop because we're out of string ... we're done: no match 1.779 + if ( aSearchStart == aSearchEnd ) 1.780 + break; 1.781 + 1.782 + // otherwise, we're at a potential match, let's see if we really hit one 1.783 + IteratorT testPattern(aPatternStart); 1.784 + IteratorT testSearch(aSearchStart); 1.785 + 1.786 + // slow inner loop verifies the potential match (found by the `fast' loop) at the current position 1.787 + for(;;) 1.788 + { 1.789 + // we already compared the first character in the outer loop, 1.790 + // so we'll advance before the next comparison 1.791 + ++testPattern; 1.792 + ++testSearch; 1.793 + 1.794 + // if we verified all the way to the end of the pattern, then we found it! 1.795 + if ( testPattern == aPatternEnd ) 1.796 + { 1.797 + found_it = true; 1.798 + aSearchEnd = testSearch; // return the exact found range through the parameters 1.799 + break; 1.800 + } 1.801 + 1.802 + // if we got to end of the string we're searching before we hit the end of the 1.803 + // pattern, we'll never find what we're looking for 1.804 + if ( testSearch == aSearchEnd ) 1.805 + { 1.806 + aSearchStart = aSearchEnd; 1.807 + break; 1.808 + } 1.809 + 1.810 + // else if we mismatched ... it's time to advance to the next search position 1.811 + // and get back into the `fast' loop 1.812 + if ( compare(testPattern.get(), testSearch.get(), 1, 1) ) 1.813 + { 1.814 + ++aSearchStart; 1.815 + break; 1.816 + } 1.817 + } 1.818 + } 1.819 + } 1.820 + 1.821 + return found_it; 1.822 + } 1.823 + 1.824 + /** 1.825 + * This searches the entire string from right to left, and returns the first match found, if any. 1.826 + */ 1.827 +template <class StringT, class IteratorT, class Comparator> 1.828 +bool 1.829 +RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare ) 1.830 + { 1.831 + IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; 1.832 + aPattern.BeginReading(patternStart); 1.833 + aPattern.EndReading(patternEnd); 1.834 + 1.835 + // Point to the last character in the pattern 1.836 + --patternEnd; 1.837 + // outer loop keeps searching till we run out of string to search 1.838 + while ( aSearchStart != searchEnd ) 1.839 + { 1.840 + // Point to the end position of the next possible match 1.841 + --searchEnd; 1.842 + 1.843 + // Check last character, if a match, explore further from here 1.844 + if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 ) 1.845 + { 1.846 + // We're at a potential match, let's see if we really hit one 1.847 + IteratorT testPattern(patternEnd); 1.848 + IteratorT testSearch(searchEnd); 1.849 + 1.850 + // inner loop verifies the potential match at the current position 1.851 + do 1.852 + { 1.853 + // if we verified all the way to the end of the pattern, then we found it! 1.854 + if ( testPattern == patternStart ) 1.855 + { 1.856 + aSearchStart = testSearch; // point to start of match 1.857 + aSearchEnd = ++searchEnd; // point to end of match 1.858 + return true; 1.859 + } 1.860 + 1.861 + // if we got to end of the string we're searching before we hit the end of the 1.862 + // pattern, we'll never find what we're looking for 1.863 + if ( testSearch == aSearchStart ) 1.864 + { 1.865 + aSearchStart = aSearchEnd; 1.866 + return false; 1.867 + } 1.868 + 1.869 + // test previous character for a match 1.870 + --testPattern; 1.871 + --testSearch; 1.872 + } 1.873 + while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 ); 1.874 + } 1.875 + } 1.876 + 1.877 + aSearchStart = aSearchEnd; 1.878 + return false; 1.879 + } 1.880 + 1.881 +bool 1.882 +FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator ) 1.883 + { 1.884 + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); 1.885 + } 1.886 + 1.887 +bool 1.888 +FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator) 1.889 + { 1.890 + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); 1.891 + } 1.892 + 1.893 +bool 1.894 +CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd ) 1.895 + { 1.896 + return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator()); 1.897 + } 1.898 + 1.899 +bool 1.900 +RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator) 1.901 + { 1.902 + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); 1.903 + } 1.904 + 1.905 +bool 1.906 +RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator) 1.907 + { 1.908 + return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); 1.909 + } 1.910 + 1.911 +bool 1.912 +FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd ) 1.913 + { 1.914 + int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); 1.915 + 1.916 + const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); 1.917 + if ( charFoundAt ) { 1.918 + aSearchStart.advance( charFoundAt - aSearchStart.get() ); 1.919 + return true; 1.920 + } 1.921 + 1.922 + aSearchStart.advance(fragmentLength); 1.923 + return false; 1.924 + } 1.925 + 1.926 +bool 1.927 +FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd ) 1.928 + { 1.929 + int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); 1.930 + 1.931 + const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); 1.932 + if ( charFoundAt ) { 1.933 + aSearchStart.advance( charFoundAt - aSearchStart.get() ); 1.934 + return true; 1.935 + } 1.936 + 1.937 + aSearchStart.advance(fragmentLength); 1.938 + return false; 1.939 + } 1.940 + 1.941 +uint32_t 1.942 +CountCharInReadable( const nsAString& aStr, 1.943 + char16_t aChar ) 1.944 +{ 1.945 + uint32_t count = 0; 1.946 + nsAString::const_iterator begin, end; 1.947 + 1.948 + aStr.BeginReading(begin); 1.949 + aStr.EndReading(end); 1.950 + 1.951 + while (begin != end) { 1.952 + if (*begin == aChar) { 1.953 + ++count; 1.954 + } 1.955 + ++begin; 1.956 + } 1.957 + 1.958 + return count; 1.959 +} 1.960 + 1.961 +uint32_t 1.962 +CountCharInReadable( const nsACString& aStr, 1.963 + char aChar ) 1.964 +{ 1.965 + uint32_t count = 0; 1.966 + nsACString::const_iterator begin, end; 1.967 + 1.968 + aStr.BeginReading(begin); 1.969 + aStr.EndReading(end); 1.970 + 1.971 + while (begin != end) { 1.972 + if (*begin == aChar) { 1.973 + ++count; 1.974 + } 1.975 + ++begin; 1.976 + } 1.977 + 1.978 + return count; 1.979 +} 1.980 + 1.981 +bool 1.982 +StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring, 1.983 + const nsStringComparator& aComparator ) 1.984 + { 1.985 + nsAString::size_type src_len = aSource.Length(), 1.986 + sub_len = aSubstring.Length(); 1.987 + if (sub_len > src_len) 1.988 + return false; 1.989 + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); 1.990 + } 1.991 + 1.992 +bool 1.993 +StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring, 1.994 + const nsCStringComparator& aComparator ) 1.995 + { 1.996 + nsACString::size_type src_len = aSource.Length(), 1.997 + sub_len = aSubstring.Length(); 1.998 + if (sub_len > src_len) 1.999 + return false; 1.1000 + return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); 1.1001 + } 1.1002 + 1.1003 +bool 1.1004 +StringEndsWith( const nsAString& aSource, const nsAString& aSubstring, 1.1005 + const nsStringComparator& aComparator ) 1.1006 + { 1.1007 + nsAString::size_type src_len = aSource.Length(), 1.1008 + sub_len = aSubstring.Length(); 1.1009 + if (sub_len > src_len) 1.1010 + return false; 1.1011 + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, 1.1012 + aComparator); 1.1013 + } 1.1014 + 1.1015 +bool 1.1016 +StringEndsWith( const nsACString& aSource, const nsACString& aSubstring, 1.1017 + const nsCStringComparator& aComparator ) 1.1018 + { 1.1019 + nsACString::size_type src_len = aSource.Length(), 1.1020 + sub_len = aSubstring.Length(); 1.1021 + if (sub_len > src_len) 1.1022 + return false; 1.1023 + return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, 1.1024 + aComparator); 1.1025 + } 1.1026 + 1.1027 + 1.1028 + 1.1029 +static const char16_t empty_buffer[1] = { '\0' }; 1.1030 + 1.1031 +const nsAFlatString& 1.1032 +EmptyString() 1.1033 + { 1.1034 + static const nsDependentString sEmpty(empty_buffer); 1.1035 + 1.1036 + return sEmpty; 1.1037 + } 1.1038 + 1.1039 +const nsAFlatCString& 1.1040 +EmptyCString() 1.1041 + { 1.1042 + static const nsDependentCString sEmpty((const char *)empty_buffer); 1.1043 + 1.1044 + return sEmpty; 1.1045 + } 1.1046 + 1.1047 +const nsAFlatString& 1.1048 +NullString() 1.1049 + { 1.1050 + static const nsXPIDLString sNull; 1.1051 + 1.1052 + return sNull; 1.1053 + } 1.1054 + 1.1055 +const nsAFlatCString& 1.1056 +NullCString() 1.1057 + { 1.1058 + static const nsXPIDLCString sNull; 1.1059 + 1.1060 + return sNull; 1.1061 + } 1.1062 + 1.1063 +int32_t 1.1064 +CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, 1.1065 + const nsASingleFragmentString& aUTF16String) 1.1066 + { 1.1067 + static const uint32_t NOT_ASCII = uint32_t(~0x7F); 1.1068 + 1.1069 + const char *u8, *u8end; 1.1070 + aUTF8String.BeginReading(u8); 1.1071 + aUTF8String.EndReading(u8end); 1.1072 + 1.1073 + const char16_t *u16, *u16end; 1.1074 + aUTF16String.BeginReading(u16); 1.1075 + aUTF16String.EndReading(u16end); 1.1076 + 1.1077 + while (u8 != u8end && u16 != u16end) 1.1078 + { 1.1079 + // Cast away the signedness of *u8 to prevent signextension when 1.1080 + // converting to uint32_t 1.1081 + uint32_t c8_32 = (uint8_t)*u8; 1.1082 + 1.1083 + if (c8_32 & NOT_ASCII) 1.1084 + { 1.1085 + bool err; 1.1086 + c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err); 1.1087 + if (err) 1.1088 + return INT32_MIN; 1.1089 + 1.1090 + uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end); 1.1091 + // The above UTF16CharEnumerator::NextChar() calls can 1.1092 + // fail, but if it does for anything other than no data to 1.1093 + // look at (which can't happen here), it returns the 1.1094 + // Unicode replacement character 0xFFFD for the invalid 1.1095 + // data they were fed. Ignore that error and treat invalid 1.1096 + // UTF16 as 0xFFFD. 1.1097 + // 1.1098 + // This matches what our UTF16 to UTF8 conversion code 1.1099 + // does, and thus a UTF8 string that came from an invalid 1.1100 + // UTF16 string will compare equal to the invalid UTF16 1.1101 + // string it came from. Same is true for any other UTF16 1.1102 + // string differs only in the invalid part of the string. 1.1103 + 1.1104 + if (c8_32 != c16_32) 1.1105 + return c8_32 < c16_32 ? -1 : 1; 1.1106 + } 1.1107 + else 1.1108 + { 1.1109 + if (c8_32 != *u16) 1.1110 + return c8_32 > *u16 ? 1 : -1; 1.1111 + 1.1112 + ++u8; 1.1113 + ++u16; 1.1114 + } 1.1115 + } 1.1116 + 1.1117 + if (u8 != u8end) 1.1118 + { 1.1119 + // We get to the end of the UTF16 string, but no to the end of 1.1120 + // the UTF8 string. The UTF8 string is longer than the UTF16 1.1121 + // string 1.1122 + 1.1123 + return 1; 1.1124 + } 1.1125 + 1.1126 + if (u16 != u16end) 1.1127 + { 1.1128 + // We get to the end of the UTF8 string, but no to the end of 1.1129 + // the UTF16 string. The UTF16 string is longer than the UTF8 1.1130 + // string 1.1131 + 1.1132 + return -1; 1.1133 + } 1.1134 + 1.1135 + // The two strings match. 1.1136 + 1.1137 + return 0; 1.1138 + } 1.1139 + 1.1140 +void 1.1141 +AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) 1.1142 + { 1.1143 + NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); 1.1144 + if (IS_IN_BMP(aSource)) 1.1145 + { 1.1146 + aDest.Append(char16_t(aSource)); 1.1147 + } 1.1148 + else 1.1149 + { 1.1150 + aDest.Append(H_SURROGATE(aSource)); 1.1151 + aDest.Append(L_SURROGATE(aSource)); 1.1152 + } 1.1153 + }