Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include "nsReadableUtils.h" |
michael@0 | 7 | |
michael@0 | 8 | #include "nsMemory.h" |
michael@0 | 9 | #include "nsString.h" |
michael@0 | 10 | #include "nsTArray.h" |
michael@0 | 11 | #include "nsUTF8Utils.h" |
michael@0 | 12 | |
michael@0 | 13 | void |
michael@0 | 14 | LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest ) |
michael@0 | 15 | { |
michael@0 | 16 | aDest.Truncate(); |
michael@0 | 17 | LossyAppendUTF16toASCII(aSource, aDest); |
michael@0 | 18 | } |
michael@0 | 19 | |
michael@0 | 20 | void |
michael@0 | 21 | CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest ) |
michael@0 | 22 | { |
michael@0 | 23 | aDest.Truncate(); |
michael@0 | 24 | AppendASCIItoUTF16(aSource, aDest); |
michael@0 | 25 | } |
michael@0 | 26 | |
michael@0 | 27 | void |
michael@0 | 28 | LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest ) |
michael@0 | 29 | { |
michael@0 | 30 | aDest.Truncate(); |
michael@0 | 31 | if (aSource) { |
michael@0 | 32 | LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); |
michael@0 | 33 | } |
michael@0 | 34 | } |
michael@0 | 35 | |
michael@0 | 36 | void |
michael@0 | 37 | CopyASCIItoUTF16( const char* aSource, nsAString& aDest ) |
michael@0 | 38 | { |
michael@0 | 39 | aDest.Truncate(); |
michael@0 | 40 | if (aSource) { |
michael@0 | 41 | AppendASCIItoUTF16(nsDependentCString(aSource), aDest); |
michael@0 | 42 | } |
michael@0 | 43 | } |
michael@0 | 44 | |
michael@0 | 45 | void |
michael@0 | 46 | CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest ) |
michael@0 | 47 | { |
michael@0 | 48 | aDest.Truncate(); |
michael@0 | 49 | AppendUTF16toUTF8(aSource, aDest); |
michael@0 | 50 | } |
michael@0 | 51 | |
michael@0 | 52 | void |
michael@0 | 53 | CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest ) |
michael@0 | 54 | { |
michael@0 | 55 | aDest.Truncate(); |
michael@0 | 56 | AppendUTF8toUTF16(aSource, aDest); |
michael@0 | 57 | } |
michael@0 | 58 | |
michael@0 | 59 | void |
michael@0 | 60 | CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest ) |
michael@0 | 61 | { |
michael@0 | 62 | aDest.Truncate(); |
michael@0 | 63 | AppendUTF16toUTF8(aSource, aDest); |
michael@0 | 64 | } |
michael@0 | 65 | |
michael@0 | 66 | void |
michael@0 | 67 | CopyUTF8toUTF16( const char* aSource, nsAString& aDest ) |
michael@0 | 68 | { |
michael@0 | 69 | aDest.Truncate(); |
michael@0 | 70 | AppendUTF8toUTF16(aSource, aDest); |
michael@0 | 71 | } |
michael@0 | 72 | |
michael@0 | 73 | void |
michael@0 | 74 | LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest ) |
michael@0 | 75 | { |
michael@0 | 76 | uint32_t old_dest_length = aDest.Length(); |
michael@0 | 77 | aDest.SetLength(old_dest_length + aSource.Length()); |
michael@0 | 78 | |
michael@0 | 79 | nsAString::const_iterator fromBegin, fromEnd; |
michael@0 | 80 | |
michael@0 | 81 | nsACString::iterator dest; |
michael@0 | 82 | aDest.BeginWriting(dest); |
michael@0 | 83 | |
michael@0 | 84 | dest.advance(old_dest_length); |
michael@0 | 85 | |
michael@0 | 86 | // right now, this won't work on multi-fragment destinations |
michael@0 | 87 | LossyConvertEncoding16to8 converter(dest.get()); |
michael@0 | 88 | |
michael@0 | 89 | copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); |
michael@0 | 90 | } |
michael@0 | 91 | |
michael@0 | 92 | void |
michael@0 | 93 | AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest ) |
michael@0 | 94 | { |
michael@0 | 95 | if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible_t())) { |
michael@0 | 96 | NS_ABORT_OOM(aDest.Length() + aSource.Length()); |
michael@0 | 97 | } |
michael@0 | 98 | } |
michael@0 | 99 | |
michael@0 | 100 | bool |
michael@0 | 101 | AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest, |
michael@0 | 102 | const mozilla::fallible_t& ) |
michael@0 | 103 | { |
michael@0 | 104 | uint32_t old_dest_length = aDest.Length(); |
michael@0 | 105 | if (!aDest.SetLength(old_dest_length + aSource.Length(), mozilla::fallible_t())) { |
michael@0 | 106 | return false; |
michael@0 | 107 | } |
michael@0 | 108 | |
michael@0 | 109 | nsACString::const_iterator fromBegin, fromEnd; |
michael@0 | 110 | |
michael@0 | 111 | nsAString::iterator dest; |
michael@0 | 112 | aDest.BeginWriting(dest); |
michael@0 | 113 | |
michael@0 | 114 | dest.advance(old_dest_length); |
michael@0 | 115 | |
michael@0 | 116 | // right now, this won't work on multi-fragment destinations |
michael@0 | 117 | LossyConvertEncoding8to16 converter(dest.get()); |
michael@0 | 118 | |
michael@0 | 119 | copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); |
michael@0 | 120 | return true; |
michael@0 | 121 | } |
michael@0 | 122 | |
michael@0 | 123 | void |
michael@0 | 124 | LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest ) |
michael@0 | 125 | { |
michael@0 | 126 | if (aSource) { |
michael@0 | 127 | LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); |
michael@0 | 128 | } |
michael@0 | 129 | } |
michael@0 | 130 | |
michael@0 | 131 | void |
michael@0 | 132 | AppendASCIItoUTF16( const char* aSource, nsAString& aDest ) |
michael@0 | 133 | { |
michael@0 | 134 | if (aSource) { |
michael@0 | 135 | AppendASCIItoUTF16(nsDependentCString(aSource), aDest); |
michael@0 | 136 | } |
michael@0 | 137 | } |
michael@0 | 138 | |
michael@0 | 139 | void |
michael@0 | 140 | AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest ) |
michael@0 | 141 | { |
michael@0 | 142 | if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible_t())) { |
michael@0 | 143 | NS_ABORT_OOM(aDest.Length() + aSource.Length()); |
michael@0 | 144 | } |
michael@0 | 145 | } |
michael@0 | 146 | |
michael@0 | 147 | bool |
michael@0 | 148 | AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest, |
michael@0 | 149 | const mozilla::fallible_t& ) |
michael@0 | 150 | { |
michael@0 | 151 | nsAString::const_iterator source_start, source_end; |
michael@0 | 152 | CalculateUTF8Size calculator; |
michael@0 | 153 | copy_string(aSource.BeginReading(source_start), |
michael@0 | 154 | aSource.EndReading(source_end), calculator); |
michael@0 | 155 | |
michael@0 | 156 | uint32_t count = calculator.Size(); |
michael@0 | 157 | |
michael@0 | 158 | if (count) |
michael@0 | 159 | { |
michael@0 | 160 | uint32_t old_dest_length = aDest.Length(); |
michael@0 | 161 | |
michael@0 | 162 | // Grow the buffer if we need to. |
michael@0 | 163 | if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) { |
michael@0 | 164 | return false; |
michael@0 | 165 | } |
michael@0 | 166 | |
michael@0 | 167 | // All ready? Time to convert |
michael@0 | 168 | |
michael@0 | 169 | ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length); |
michael@0 | 170 | copy_string(aSource.BeginReading(source_start), |
michael@0 | 171 | aSource.EndReading(source_end), converter); |
michael@0 | 172 | |
michael@0 | 173 | NS_ASSERTION(converter.Size() == count, |
michael@0 | 174 | "Unexpected disparity between CalculateUTF8Size and " |
michael@0 | 175 | "ConvertUTF16toUTF8"); |
michael@0 | 176 | } |
michael@0 | 177 | |
michael@0 | 178 | return true; |
michael@0 | 179 | } |
michael@0 | 180 | |
michael@0 | 181 | void |
michael@0 | 182 | AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest ) |
michael@0 | 183 | { |
michael@0 | 184 | if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible_t())) { |
michael@0 | 185 | NS_ABORT_OOM(aDest.Length() + aSource.Length()); |
michael@0 | 186 | } |
michael@0 | 187 | } |
michael@0 | 188 | |
michael@0 | 189 | bool |
michael@0 | 190 | AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest, |
michael@0 | 191 | const mozilla::fallible_t& ) |
michael@0 | 192 | { |
michael@0 | 193 | nsACString::const_iterator source_start, source_end; |
michael@0 | 194 | CalculateUTF8Length calculator; |
michael@0 | 195 | copy_string(aSource.BeginReading(source_start), |
michael@0 | 196 | aSource.EndReading(source_end), calculator); |
michael@0 | 197 | |
michael@0 | 198 | uint32_t count = calculator.Length(); |
michael@0 | 199 | |
michael@0 | 200 | // Avoid making the string mutable if we're appending an empty string |
michael@0 | 201 | if (count) |
michael@0 | 202 | { |
michael@0 | 203 | uint32_t old_dest_length = aDest.Length(); |
michael@0 | 204 | |
michael@0 | 205 | // Grow the buffer if we need to. |
michael@0 | 206 | if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) { |
michael@0 | 207 | return false; |
michael@0 | 208 | } |
michael@0 | 209 | |
michael@0 | 210 | // All ready? Time to convert |
michael@0 | 211 | |
michael@0 | 212 | ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length); |
michael@0 | 213 | copy_string(aSource.BeginReading(source_start), |
michael@0 | 214 | aSource.EndReading(source_end), converter); |
michael@0 | 215 | |
michael@0 | 216 | NS_ASSERTION(converter.ErrorEncountered() || |
michael@0 | 217 | converter.Length() == count, |
michael@0 | 218 | "CalculateUTF8Length produced the wrong length"); |
michael@0 | 219 | |
michael@0 | 220 | if (converter.ErrorEncountered()) |
michael@0 | 221 | { |
michael@0 | 222 | NS_ERROR("Input wasn't UTF8 or incorrect length was calculated"); |
michael@0 | 223 | aDest.SetLength(old_dest_length); |
michael@0 | 224 | } |
michael@0 | 225 | } |
michael@0 | 226 | |
michael@0 | 227 | return true; |
michael@0 | 228 | } |
michael@0 | 229 | |
michael@0 | 230 | void |
michael@0 | 231 | AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest ) |
michael@0 | 232 | { |
michael@0 | 233 | if (aSource) { |
michael@0 | 234 | AppendUTF16toUTF8(nsDependentString(aSource), aDest); |
michael@0 | 235 | } |
michael@0 | 236 | } |
michael@0 | 237 | |
michael@0 | 238 | void |
michael@0 | 239 | AppendUTF8toUTF16( const char* aSource, nsAString& aDest ) |
michael@0 | 240 | { |
michael@0 | 241 | if (aSource) { |
michael@0 | 242 | AppendUTF8toUTF16(nsDependentCString(aSource), aDest); |
michael@0 | 243 | } |
michael@0 | 244 | } |
michael@0 | 245 | |
michael@0 | 246 | |
michael@0 | 247 | /** |
michael@0 | 248 | * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator). |
michael@0 | 249 | * |
michael@0 | 250 | * @param aSource an string you will eventually be making a copy of |
michael@0 | 251 | * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|. |
michael@0 | 252 | * |
michael@0 | 253 | */ |
michael@0 | 254 | template <class FromStringT, class ToCharT> |
michael@0 | 255 | inline |
michael@0 | 256 | ToCharT* |
michael@0 | 257 | AllocateStringCopy( const FromStringT& aSource, ToCharT* ) |
michael@0 | 258 | { |
michael@0 | 259 | return static_cast<ToCharT*>(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT))); |
michael@0 | 260 | } |
michael@0 | 261 | |
michael@0 | 262 | |
michael@0 | 263 | char* |
michael@0 | 264 | ToNewCString( const nsAString& aSource ) |
michael@0 | 265 | { |
michael@0 | 266 | char* result = AllocateStringCopy(aSource, (char*)0); |
michael@0 | 267 | if (!result) |
michael@0 | 268 | return nullptr; |
michael@0 | 269 | |
michael@0 | 270 | nsAString::const_iterator fromBegin, fromEnd; |
michael@0 | 271 | LossyConvertEncoding16to8 converter(result); |
michael@0 | 272 | copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator(); |
michael@0 | 273 | return result; |
michael@0 | 274 | } |
michael@0 | 275 | |
michael@0 | 276 | char* |
michael@0 | 277 | ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count ) |
michael@0 | 278 | { |
michael@0 | 279 | nsAString::const_iterator start, end; |
michael@0 | 280 | CalculateUTF8Size calculator; |
michael@0 | 281 | copy_string(aSource.BeginReading(start), aSource.EndReading(end), |
michael@0 | 282 | calculator); |
michael@0 | 283 | |
michael@0 | 284 | if (aUTF8Count) |
michael@0 | 285 | *aUTF8Count = calculator.Size(); |
michael@0 | 286 | |
michael@0 | 287 | char *result = static_cast<char*> |
michael@0 | 288 | (nsMemory::Alloc(calculator.Size() + 1)); |
michael@0 | 289 | if (!result) |
michael@0 | 290 | return nullptr; |
michael@0 | 291 | |
michael@0 | 292 | ConvertUTF16toUTF8 converter(result); |
michael@0 | 293 | copy_string(aSource.BeginReading(start), aSource.EndReading(end), |
michael@0 | 294 | converter).write_terminator(); |
michael@0 | 295 | NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); |
michael@0 | 296 | |
michael@0 | 297 | return result; |
michael@0 | 298 | } |
michael@0 | 299 | |
michael@0 | 300 | char* |
michael@0 | 301 | ToNewCString( const nsACString& aSource ) |
michael@0 | 302 | { |
michael@0 | 303 | // no conversion needed, just allocate a buffer of the correct length and copy into it |
michael@0 | 304 | |
michael@0 | 305 | char* result = AllocateStringCopy(aSource, (char*)0); |
michael@0 | 306 | if (!result) |
michael@0 | 307 | return nullptr; |
michael@0 | 308 | |
michael@0 | 309 | nsACString::const_iterator fromBegin, fromEnd; |
michael@0 | 310 | char* toBegin = result; |
michael@0 | 311 | *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0); |
michael@0 | 312 | return result; |
michael@0 | 313 | } |
michael@0 | 314 | |
michael@0 | 315 | char16_t* |
michael@0 | 316 | ToNewUnicode( const nsAString& aSource ) |
michael@0 | 317 | { |
michael@0 | 318 | // no conversion needed, just allocate a buffer of the correct length and copy into it |
michael@0 | 319 | |
michael@0 | 320 | char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); |
michael@0 | 321 | if (!result) |
michael@0 | 322 | return nullptr; |
michael@0 | 323 | |
michael@0 | 324 | nsAString::const_iterator fromBegin, fromEnd; |
michael@0 | 325 | char16_t* toBegin = result; |
michael@0 | 326 | *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char16_t(0); |
michael@0 | 327 | return result; |
michael@0 | 328 | } |
michael@0 | 329 | |
michael@0 | 330 | char16_t* |
michael@0 | 331 | ToNewUnicode( const nsACString& aSource ) |
michael@0 | 332 | { |
michael@0 | 333 | char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); |
michael@0 | 334 | if (!result) |
michael@0 | 335 | return nullptr; |
michael@0 | 336 | |
michael@0 | 337 | nsACString::const_iterator fromBegin, fromEnd; |
michael@0 | 338 | LossyConvertEncoding8to16 converter(result); |
michael@0 | 339 | copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator(); |
michael@0 | 340 | return result; |
michael@0 | 341 | } |
michael@0 | 342 | |
michael@0 | 343 | uint32_t |
michael@0 | 344 | CalcUTF8ToUnicodeLength( const nsACString& aSource) |
michael@0 | 345 | { |
michael@0 | 346 | nsACString::const_iterator start, end; |
michael@0 | 347 | CalculateUTF8Length calculator; |
michael@0 | 348 | copy_string(aSource.BeginReading(start), aSource.EndReading(end), |
michael@0 | 349 | calculator); |
michael@0 | 350 | return calculator.Length(); |
michael@0 | 351 | } |
michael@0 | 352 | |
michael@0 | 353 | char16_t* |
michael@0 | 354 | UTF8ToUnicodeBuffer( const nsACString& aSource, char16_t* aBuffer, uint32_t *aUTF16Count ) |
michael@0 | 355 | { |
michael@0 | 356 | nsACString::const_iterator start, end; |
michael@0 | 357 | ConvertUTF8toUTF16 converter(aBuffer); |
michael@0 | 358 | copy_string(aSource.BeginReading(start), |
michael@0 | 359 | aSource.EndReading(end), |
michael@0 | 360 | converter).write_terminator(); |
michael@0 | 361 | if (aUTF16Count) |
michael@0 | 362 | *aUTF16Count = converter.Length(); |
michael@0 | 363 | return aBuffer; |
michael@0 | 364 | } |
michael@0 | 365 | |
michael@0 | 366 | char16_t* |
michael@0 | 367 | UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count ) |
michael@0 | 368 | { |
michael@0 | 369 | const uint32_t length = CalcUTF8ToUnicodeLength(aSource); |
michael@0 | 370 | const size_t buffer_size = (length + 1) * sizeof(char16_t); |
michael@0 | 371 | char16_t *buffer = static_cast<char16_t*>(nsMemory::Alloc(buffer_size)); |
michael@0 | 372 | if (!buffer) |
michael@0 | 373 | return nullptr; |
michael@0 | 374 | |
michael@0 | 375 | uint32_t copied; |
michael@0 | 376 | UTF8ToUnicodeBuffer(aSource, buffer, &copied); |
michael@0 | 377 | NS_ASSERTION(length == copied, "length mismatch"); |
michael@0 | 378 | |
michael@0 | 379 | if (aUTF16Count) |
michael@0 | 380 | *aUTF16Count = copied; |
michael@0 | 381 | return buffer; |
michael@0 | 382 | } |
michael@0 | 383 | |
michael@0 | 384 | char16_t* |
michael@0 | 385 | CopyUnicodeTo( const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, uint32_t aLength ) |
michael@0 | 386 | { |
michael@0 | 387 | nsAString::const_iterator fromBegin, fromEnd; |
michael@0 | 388 | char16_t* toBegin = aDest; |
michael@0 | 389 | copy_string(aSource.BeginReading(fromBegin).advance( int32_t(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( int32_t(aSrcOffset+aLength) ), toBegin); |
michael@0 | 390 | return aDest; |
michael@0 | 391 | } |
michael@0 | 392 | |
michael@0 | 393 | void |
michael@0 | 394 | CopyUnicodeTo( const nsAString::const_iterator& aSrcStart, |
michael@0 | 395 | const nsAString::const_iterator& aSrcEnd, |
michael@0 | 396 | nsAString& aDest ) |
michael@0 | 397 | { |
michael@0 | 398 | nsAString::iterator writer; |
michael@0 | 399 | aDest.SetLength(Distance(aSrcStart, aSrcEnd)); |
michael@0 | 400 | |
michael@0 | 401 | aDest.BeginWriting(writer); |
michael@0 | 402 | nsAString::const_iterator fromBegin(aSrcStart); |
michael@0 | 403 | |
michael@0 | 404 | copy_string(fromBegin, aSrcEnd, writer); |
michael@0 | 405 | } |
michael@0 | 406 | |
michael@0 | 407 | void |
michael@0 | 408 | AppendUnicodeTo( const nsAString::const_iterator& aSrcStart, |
michael@0 | 409 | const nsAString::const_iterator& aSrcEnd, |
michael@0 | 410 | nsAString& aDest ) |
michael@0 | 411 | { |
michael@0 | 412 | nsAString::iterator writer; |
michael@0 | 413 | uint32_t oldLength = aDest.Length(); |
michael@0 | 414 | aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd)); |
michael@0 | 415 | |
michael@0 | 416 | aDest.BeginWriting(writer).advance(oldLength); |
michael@0 | 417 | nsAString::const_iterator fromBegin(aSrcStart); |
michael@0 | 418 | |
michael@0 | 419 | copy_string(fromBegin, aSrcEnd, writer); |
michael@0 | 420 | } |
michael@0 | 421 | |
michael@0 | 422 | bool |
michael@0 | 423 | IsASCII( const nsAString& aString ) |
michael@0 | 424 | { |
michael@0 | 425 | static const char16_t NOT_ASCII = char16_t(~0x007F); |
michael@0 | 426 | |
michael@0 | 427 | |
michael@0 | 428 | // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character |
michael@0 | 429 | |
michael@0 | 430 | nsAString::const_iterator iter, done_reading; |
michael@0 | 431 | aString.BeginReading(iter); |
michael@0 | 432 | aString.EndReading(done_reading); |
michael@0 | 433 | |
michael@0 | 434 | const char16_t* c = iter.get(); |
michael@0 | 435 | const char16_t* end = done_reading.get(); |
michael@0 | 436 | |
michael@0 | 437 | while ( c < end ) |
michael@0 | 438 | { |
michael@0 | 439 | if ( *c++ & NOT_ASCII ) |
michael@0 | 440 | return false; |
michael@0 | 441 | } |
michael@0 | 442 | |
michael@0 | 443 | return true; |
michael@0 | 444 | } |
michael@0 | 445 | |
michael@0 | 446 | bool |
michael@0 | 447 | IsASCII( const nsACString& aString ) |
michael@0 | 448 | { |
michael@0 | 449 | static const char NOT_ASCII = char(~0x7F); |
michael@0 | 450 | |
michael@0 | 451 | |
michael@0 | 452 | // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character |
michael@0 | 453 | |
michael@0 | 454 | nsACString::const_iterator iter, done_reading; |
michael@0 | 455 | aString.BeginReading(iter); |
michael@0 | 456 | aString.EndReading(done_reading); |
michael@0 | 457 | |
michael@0 | 458 | const char* c = iter.get(); |
michael@0 | 459 | const char* end = done_reading.get(); |
michael@0 | 460 | |
michael@0 | 461 | while ( c < end ) |
michael@0 | 462 | { |
michael@0 | 463 | if ( *c++ & NOT_ASCII ) |
michael@0 | 464 | return false; |
michael@0 | 465 | } |
michael@0 | 466 | |
michael@0 | 467 | return true; |
michael@0 | 468 | } |
michael@0 | 469 | |
michael@0 | 470 | bool |
michael@0 | 471 | IsUTF8( const nsACString& aString, bool aRejectNonChar ) |
michael@0 | 472 | { |
michael@0 | 473 | nsReadingIterator<char> done_reading; |
michael@0 | 474 | aString.EndReading(done_reading); |
michael@0 | 475 | |
michael@0 | 476 | int32_t state = 0; |
michael@0 | 477 | bool overlong = false; |
michael@0 | 478 | bool surrogate = false; |
michael@0 | 479 | bool nonchar = false; |
michael@0 | 480 | uint16_t olupper = 0; // overlong byte upper bound. |
michael@0 | 481 | uint16_t slower = 0; // surrogate byte lower bound. |
michael@0 | 482 | |
michael@0 | 483 | nsReadingIterator<char> iter; |
michael@0 | 484 | aString.BeginReading(iter); |
michael@0 | 485 | |
michael@0 | 486 | const char* ptr = iter.get(); |
michael@0 | 487 | const char* end = done_reading.get(); |
michael@0 | 488 | while ( ptr < end ) |
michael@0 | 489 | { |
michael@0 | 490 | uint8_t c; |
michael@0 | 491 | |
michael@0 | 492 | if (0 == state) |
michael@0 | 493 | { |
michael@0 | 494 | c = *ptr++; |
michael@0 | 495 | |
michael@0 | 496 | if ( UTF8traits::isASCII(c) ) |
michael@0 | 497 | continue; |
michael@0 | 498 | |
michael@0 | 499 | if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong. |
michael@0 | 500 | return false; |
michael@0 | 501 | else if ( UTF8traits::is2byte(c) ) |
michael@0 | 502 | state = 1; |
michael@0 | 503 | else if ( UTF8traits::is3byte(c) ) |
michael@0 | 504 | { |
michael@0 | 505 | state = 2; |
michael@0 | 506 | if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF] |
michael@0 | 507 | { |
michael@0 | 508 | overlong = true; |
michael@0 | 509 | olupper = 0x9F; |
michael@0 | 510 | } |
michael@0 | 511 | else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint |
michael@0 | 512 | { |
michael@0 | 513 | surrogate = true; |
michael@0 | 514 | slower = 0xA0; |
michael@0 | 515 | } |
michael@0 | 516 | else if ( c == 0xEF ) // EF BF [BE-BF] : non-character |
michael@0 | 517 | nonchar = true; |
michael@0 | 518 | } |
michael@0 | 519 | else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090) |
michael@0 | 520 | { |
michael@0 | 521 | state = 3; |
michael@0 | 522 | nonchar = true; |
michael@0 | 523 | if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2} |
michael@0 | 524 | { |
michael@0 | 525 | overlong = true; |
michael@0 | 526 | olupper = 0x8F; |
michael@0 | 527 | } |
michael@0 | 528 | else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF] |
michael@0 | 529 | { |
michael@0 | 530 | // actually not surrogates but codepoints beyond 0x10FFFF |
michael@0 | 531 | surrogate = true; |
michael@0 | 532 | slower = 0x90; |
michael@0 | 533 | } |
michael@0 | 534 | } |
michael@0 | 535 | else |
michael@0 | 536 | return false; // Not UTF-8 string |
michael@0 | 537 | } |
michael@0 | 538 | |
michael@0 | 539 | if (nonchar && !aRejectNonChar) |
michael@0 | 540 | nonchar = false; |
michael@0 | 541 | |
michael@0 | 542 | while ( ptr < end && state ) |
michael@0 | 543 | { |
michael@0 | 544 | c = *ptr++; |
michael@0 | 545 | --state; |
michael@0 | 546 | |
michael@0 | 547 | // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF] |
michael@0 | 548 | if ( nonchar && |
michael@0 | 549 | ( ( !state && c < 0xBE ) || |
michael@0 | 550 | ( state == 1 && c != 0xBF ) || |
michael@0 | 551 | ( state == 2 && 0x0F != (0x0F & c) ))) |
michael@0 | 552 | nonchar = false; |
michael@0 | 553 | |
michael@0 | 554 | if ( !UTF8traits::isInSeq(c) || ( overlong && c <= olupper ) || |
michael@0 | 555 | ( surrogate && slower <= c ) || ( nonchar && !state )) |
michael@0 | 556 | return false; // Not UTF-8 string |
michael@0 | 557 | |
michael@0 | 558 | overlong = surrogate = false; |
michael@0 | 559 | } |
michael@0 | 560 | } |
michael@0 | 561 | return !state; // state != 0 at the end indicates an invalid UTF-8 seq. |
michael@0 | 562 | } |
michael@0 | 563 | |
michael@0 | 564 | /** |
michael@0 | 565 | * A character sink for in-place case conversion. |
michael@0 | 566 | */ |
michael@0 | 567 | class ConvertToUpperCase |
michael@0 | 568 | { |
michael@0 | 569 | public: |
michael@0 | 570 | typedef char value_type; |
michael@0 | 571 | |
michael@0 | 572 | uint32_t |
michael@0 | 573 | write( const char* aSource, uint32_t aSourceLength ) |
michael@0 | 574 | { |
michael@0 | 575 | char* cp = const_cast<char*>(aSource); |
michael@0 | 576 | const char* end = aSource + aSourceLength; |
michael@0 | 577 | while (cp != end) { |
michael@0 | 578 | char ch = *cp; |
michael@0 | 579 | if ((ch >= 'a') && (ch <= 'z')) |
michael@0 | 580 | *cp = ch - ('a' - 'A'); |
michael@0 | 581 | ++cp; |
michael@0 | 582 | } |
michael@0 | 583 | return aSourceLength; |
michael@0 | 584 | } |
michael@0 | 585 | }; |
michael@0 | 586 | |
michael@0 | 587 | void |
michael@0 | 588 | ToUpperCase( nsCSubstring& aCString ) |
michael@0 | 589 | { |
michael@0 | 590 | ConvertToUpperCase converter; |
michael@0 | 591 | char* start; |
michael@0 | 592 | converter.write(aCString.BeginWriting(start), aCString.Length()); |
michael@0 | 593 | } |
michael@0 | 594 | |
michael@0 | 595 | /** |
michael@0 | 596 | * A character sink for copying with case conversion. |
michael@0 | 597 | */ |
michael@0 | 598 | class CopyToUpperCase |
michael@0 | 599 | { |
michael@0 | 600 | public: |
michael@0 | 601 | typedef char value_type; |
michael@0 | 602 | |
michael@0 | 603 | CopyToUpperCase( nsACString::iterator& aDestIter ) |
michael@0 | 604 | : mIter(aDestIter) |
michael@0 | 605 | { |
michael@0 | 606 | } |
michael@0 | 607 | |
michael@0 | 608 | uint32_t |
michael@0 | 609 | write( const char* aSource, uint32_t aSourceLength ) |
michael@0 | 610 | { |
michael@0 | 611 | uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength); |
michael@0 | 612 | char* cp = mIter.get(); |
michael@0 | 613 | const char* end = aSource + len; |
michael@0 | 614 | while (aSource != end) { |
michael@0 | 615 | char ch = *aSource; |
michael@0 | 616 | if ((ch >= 'a') && (ch <= 'z')) |
michael@0 | 617 | *cp = ch - ('a' - 'A'); |
michael@0 | 618 | else |
michael@0 | 619 | *cp = ch; |
michael@0 | 620 | ++aSource; |
michael@0 | 621 | ++cp; |
michael@0 | 622 | } |
michael@0 | 623 | mIter.advance(len); |
michael@0 | 624 | return len; |
michael@0 | 625 | } |
michael@0 | 626 | |
michael@0 | 627 | protected: |
michael@0 | 628 | nsACString::iterator& mIter; |
michael@0 | 629 | }; |
michael@0 | 630 | |
michael@0 | 631 | void |
michael@0 | 632 | ToUpperCase( const nsACString& aSource, nsACString& aDest ) |
michael@0 | 633 | { |
michael@0 | 634 | nsACString::const_iterator fromBegin, fromEnd; |
michael@0 | 635 | nsACString::iterator toBegin; |
michael@0 | 636 | aDest.SetLength(aSource.Length()); |
michael@0 | 637 | |
michael@0 | 638 | CopyToUpperCase converter(aDest.BeginWriting(toBegin)); |
michael@0 | 639 | copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); |
michael@0 | 640 | } |
michael@0 | 641 | |
michael@0 | 642 | /** |
michael@0 | 643 | * A character sink for case conversion. |
michael@0 | 644 | */ |
michael@0 | 645 | class ConvertToLowerCase |
michael@0 | 646 | { |
michael@0 | 647 | public: |
michael@0 | 648 | typedef char value_type; |
michael@0 | 649 | |
michael@0 | 650 | uint32_t |
michael@0 | 651 | write( const char* aSource, uint32_t aSourceLength ) |
michael@0 | 652 | { |
michael@0 | 653 | char* cp = const_cast<char*>(aSource); |
michael@0 | 654 | const char* end = aSource + aSourceLength; |
michael@0 | 655 | while (cp != end) { |
michael@0 | 656 | char ch = *cp; |
michael@0 | 657 | if ((ch >= 'A') && (ch <= 'Z')) |
michael@0 | 658 | *cp = ch + ('a' - 'A'); |
michael@0 | 659 | ++cp; |
michael@0 | 660 | } |
michael@0 | 661 | return aSourceLength; |
michael@0 | 662 | } |
michael@0 | 663 | }; |
michael@0 | 664 | |
michael@0 | 665 | void |
michael@0 | 666 | ToLowerCase( nsCSubstring& aCString ) |
michael@0 | 667 | { |
michael@0 | 668 | ConvertToLowerCase converter; |
michael@0 | 669 | char* start; |
michael@0 | 670 | converter.write(aCString.BeginWriting(start), aCString.Length()); |
michael@0 | 671 | } |
michael@0 | 672 | |
michael@0 | 673 | /** |
michael@0 | 674 | * A character sink for copying with case conversion. |
michael@0 | 675 | */ |
michael@0 | 676 | class CopyToLowerCase |
michael@0 | 677 | { |
michael@0 | 678 | public: |
michael@0 | 679 | typedef char value_type; |
michael@0 | 680 | |
michael@0 | 681 | CopyToLowerCase( nsACString::iterator& aDestIter ) |
michael@0 | 682 | : mIter(aDestIter) |
michael@0 | 683 | { |
michael@0 | 684 | } |
michael@0 | 685 | |
michael@0 | 686 | uint32_t |
michael@0 | 687 | write( const char* aSource, uint32_t aSourceLength ) |
michael@0 | 688 | { |
michael@0 | 689 | uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength); |
michael@0 | 690 | char* cp = mIter.get(); |
michael@0 | 691 | const char* end = aSource + len; |
michael@0 | 692 | while (aSource != end) { |
michael@0 | 693 | char ch = *aSource; |
michael@0 | 694 | if ((ch >= 'A') && (ch <= 'Z')) |
michael@0 | 695 | *cp = ch + ('a' - 'A'); |
michael@0 | 696 | else |
michael@0 | 697 | *cp = ch; |
michael@0 | 698 | ++aSource; |
michael@0 | 699 | ++cp; |
michael@0 | 700 | } |
michael@0 | 701 | mIter.advance(len); |
michael@0 | 702 | return len; |
michael@0 | 703 | } |
michael@0 | 704 | |
michael@0 | 705 | protected: |
michael@0 | 706 | nsACString::iterator& mIter; |
michael@0 | 707 | }; |
michael@0 | 708 | |
michael@0 | 709 | void |
michael@0 | 710 | ToLowerCase( const nsACString& aSource, nsACString& aDest ) |
michael@0 | 711 | { |
michael@0 | 712 | nsACString::const_iterator fromBegin, fromEnd; |
michael@0 | 713 | nsACString::iterator toBegin; |
michael@0 | 714 | aDest.SetLength(aSource.Length()); |
michael@0 | 715 | |
michael@0 | 716 | CopyToLowerCase converter(aDest.BeginWriting(toBegin)); |
michael@0 | 717 | copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); |
michael@0 | 718 | } |
michael@0 | 719 | |
michael@0 | 720 | bool |
michael@0 | 721 | ParseString(const nsACString& aSource, char aDelimiter, |
michael@0 | 722 | nsTArray<nsCString>& aArray) |
michael@0 | 723 | { |
michael@0 | 724 | nsACString::const_iterator start, end; |
michael@0 | 725 | aSource.BeginReading(start); |
michael@0 | 726 | aSource.EndReading(end); |
michael@0 | 727 | |
michael@0 | 728 | uint32_t oldLength = aArray.Length(); |
michael@0 | 729 | |
michael@0 | 730 | for (;;) |
michael@0 | 731 | { |
michael@0 | 732 | nsACString::const_iterator delimiter = start; |
michael@0 | 733 | FindCharInReadable(aDelimiter, delimiter, end); |
michael@0 | 734 | |
michael@0 | 735 | if (delimiter != start) |
michael@0 | 736 | { |
michael@0 | 737 | if (!aArray.AppendElement(Substring(start, delimiter))) |
michael@0 | 738 | { |
michael@0 | 739 | aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength); |
michael@0 | 740 | return false; |
michael@0 | 741 | } |
michael@0 | 742 | } |
michael@0 | 743 | |
michael@0 | 744 | if (delimiter == end) |
michael@0 | 745 | break; |
michael@0 | 746 | start = ++delimiter; |
michael@0 | 747 | if (start == end) |
michael@0 | 748 | break; |
michael@0 | 749 | } |
michael@0 | 750 | |
michael@0 | 751 | return true; |
michael@0 | 752 | } |
michael@0 | 753 | |
michael@0 | 754 | template <class StringT, class IteratorT, class Comparator> |
michael@0 | 755 | bool |
michael@0 | 756 | FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare ) |
michael@0 | 757 | { |
michael@0 | 758 | bool found_it = false; |
michael@0 | 759 | |
michael@0 | 760 | // only bother searching at all if we're given a non-empty range to search |
michael@0 | 761 | if ( aSearchStart != aSearchEnd ) |
michael@0 | 762 | { |
michael@0 | 763 | IteratorT aPatternStart, aPatternEnd; |
michael@0 | 764 | aPattern.BeginReading(aPatternStart); |
michael@0 | 765 | aPattern.EndReading(aPatternEnd); |
michael@0 | 766 | |
michael@0 | 767 | // outer loop keeps searching till we find it or run out of string to search |
michael@0 | 768 | while ( !found_it ) |
michael@0 | 769 | { |
michael@0 | 770 | // fast inner loop (that's what it's called, not what it is) looks for a potential match |
michael@0 | 771 | while ( aSearchStart != aSearchEnd && |
michael@0 | 772 | compare(aPatternStart.get(), aSearchStart.get(), 1, 1) ) |
michael@0 | 773 | ++aSearchStart; |
michael@0 | 774 | |
michael@0 | 775 | // if we broke out of the `fast' loop because we're out of string ... we're done: no match |
michael@0 | 776 | if ( aSearchStart == aSearchEnd ) |
michael@0 | 777 | break; |
michael@0 | 778 | |
michael@0 | 779 | // otherwise, we're at a potential match, let's see if we really hit one |
michael@0 | 780 | IteratorT testPattern(aPatternStart); |
michael@0 | 781 | IteratorT testSearch(aSearchStart); |
michael@0 | 782 | |
michael@0 | 783 | // slow inner loop verifies the potential match (found by the `fast' loop) at the current position |
michael@0 | 784 | for(;;) |
michael@0 | 785 | { |
michael@0 | 786 | // we already compared the first character in the outer loop, |
michael@0 | 787 | // so we'll advance before the next comparison |
michael@0 | 788 | ++testPattern; |
michael@0 | 789 | ++testSearch; |
michael@0 | 790 | |
michael@0 | 791 | // if we verified all the way to the end of the pattern, then we found it! |
michael@0 | 792 | if ( testPattern == aPatternEnd ) |
michael@0 | 793 | { |
michael@0 | 794 | found_it = true; |
michael@0 | 795 | aSearchEnd = testSearch; // return the exact found range through the parameters |
michael@0 | 796 | break; |
michael@0 | 797 | } |
michael@0 | 798 | |
michael@0 | 799 | // if we got to end of the string we're searching before we hit the end of the |
michael@0 | 800 | // pattern, we'll never find what we're looking for |
michael@0 | 801 | if ( testSearch == aSearchEnd ) |
michael@0 | 802 | { |
michael@0 | 803 | aSearchStart = aSearchEnd; |
michael@0 | 804 | break; |
michael@0 | 805 | } |
michael@0 | 806 | |
michael@0 | 807 | // else if we mismatched ... it's time to advance to the next search position |
michael@0 | 808 | // and get back into the `fast' loop |
michael@0 | 809 | if ( compare(testPattern.get(), testSearch.get(), 1, 1) ) |
michael@0 | 810 | { |
michael@0 | 811 | ++aSearchStart; |
michael@0 | 812 | break; |
michael@0 | 813 | } |
michael@0 | 814 | } |
michael@0 | 815 | } |
michael@0 | 816 | } |
michael@0 | 817 | |
michael@0 | 818 | return found_it; |
michael@0 | 819 | } |
michael@0 | 820 | |
michael@0 | 821 | /** |
michael@0 | 822 | * This searches the entire string from right to left, and returns the first match found, if any. |
michael@0 | 823 | */ |
michael@0 | 824 | template <class StringT, class IteratorT, class Comparator> |
michael@0 | 825 | bool |
michael@0 | 826 | RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare ) |
michael@0 | 827 | { |
michael@0 | 828 | IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; |
michael@0 | 829 | aPattern.BeginReading(patternStart); |
michael@0 | 830 | aPattern.EndReading(patternEnd); |
michael@0 | 831 | |
michael@0 | 832 | // Point to the last character in the pattern |
michael@0 | 833 | --patternEnd; |
michael@0 | 834 | // outer loop keeps searching till we run out of string to search |
michael@0 | 835 | while ( aSearchStart != searchEnd ) |
michael@0 | 836 | { |
michael@0 | 837 | // Point to the end position of the next possible match |
michael@0 | 838 | --searchEnd; |
michael@0 | 839 | |
michael@0 | 840 | // Check last character, if a match, explore further from here |
michael@0 | 841 | if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 ) |
michael@0 | 842 | { |
michael@0 | 843 | // We're at a potential match, let's see if we really hit one |
michael@0 | 844 | IteratorT testPattern(patternEnd); |
michael@0 | 845 | IteratorT testSearch(searchEnd); |
michael@0 | 846 | |
michael@0 | 847 | // inner loop verifies the potential match at the current position |
michael@0 | 848 | do |
michael@0 | 849 | { |
michael@0 | 850 | // if we verified all the way to the end of the pattern, then we found it! |
michael@0 | 851 | if ( testPattern == patternStart ) |
michael@0 | 852 | { |
michael@0 | 853 | aSearchStart = testSearch; // point to start of match |
michael@0 | 854 | aSearchEnd = ++searchEnd; // point to end of match |
michael@0 | 855 | return true; |
michael@0 | 856 | } |
michael@0 | 857 | |
michael@0 | 858 | // if we got to end of the string we're searching before we hit the end of the |
michael@0 | 859 | // pattern, we'll never find what we're looking for |
michael@0 | 860 | if ( testSearch == aSearchStart ) |
michael@0 | 861 | { |
michael@0 | 862 | aSearchStart = aSearchEnd; |
michael@0 | 863 | return false; |
michael@0 | 864 | } |
michael@0 | 865 | |
michael@0 | 866 | // test previous character for a match |
michael@0 | 867 | --testPattern; |
michael@0 | 868 | --testSearch; |
michael@0 | 869 | } |
michael@0 | 870 | while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 ); |
michael@0 | 871 | } |
michael@0 | 872 | } |
michael@0 | 873 | |
michael@0 | 874 | aSearchStart = aSearchEnd; |
michael@0 | 875 | return false; |
michael@0 | 876 | } |
michael@0 | 877 | |
michael@0 | 878 | bool |
michael@0 | 879 | FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator ) |
michael@0 | 880 | { |
michael@0 | 881 | return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); |
michael@0 | 882 | } |
michael@0 | 883 | |
michael@0 | 884 | bool |
michael@0 | 885 | FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator) |
michael@0 | 886 | { |
michael@0 | 887 | return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); |
michael@0 | 888 | } |
michael@0 | 889 | |
michael@0 | 890 | bool |
michael@0 | 891 | CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd ) |
michael@0 | 892 | { |
michael@0 | 893 | return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator()); |
michael@0 | 894 | } |
michael@0 | 895 | |
michael@0 | 896 | bool |
michael@0 | 897 | RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator) |
michael@0 | 898 | { |
michael@0 | 899 | return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); |
michael@0 | 900 | } |
michael@0 | 901 | |
michael@0 | 902 | bool |
michael@0 | 903 | RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator) |
michael@0 | 904 | { |
michael@0 | 905 | return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); |
michael@0 | 906 | } |
michael@0 | 907 | |
michael@0 | 908 | bool |
michael@0 | 909 | FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd ) |
michael@0 | 910 | { |
michael@0 | 911 | int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); |
michael@0 | 912 | |
michael@0 | 913 | const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); |
michael@0 | 914 | if ( charFoundAt ) { |
michael@0 | 915 | aSearchStart.advance( charFoundAt - aSearchStart.get() ); |
michael@0 | 916 | return true; |
michael@0 | 917 | } |
michael@0 | 918 | |
michael@0 | 919 | aSearchStart.advance(fragmentLength); |
michael@0 | 920 | return false; |
michael@0 | 921 | } |
michael@0 | 922 | |
michael@0 | 923 | bool |
michael@0 | 924 | FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd ) |
michael@0 | 925 | { |
michael@0 | 926 | int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); |
michael@0 | 927 | |
michael@0 | 928 | const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); |
michael@0 | 929 | if ( charFoundAt ) { |
michael@0 | 930 | aSearchStart.advance( charFoundAt - aSearchStart.get() ); |
michael@0 | 931 | return true; |
michael@0 | 932 | } |
michael@0 | 933 | |
michael@0 | 934 | aSearchStart.advance(fragmentLength); |
michael@0 | 935 | return false; |
michael@0 | 936 | } |
michael@0 | 937 | |
michael@0 | 938 | uint32_t |
michael@0 | 939 | CountCharInReadable( const nsAString& aStr, |
michael@0 | 940 | char16_t aChar ) |
michael@0 | 941 | { |
michael@0 | 942 | uint32_t count = 0; |
michael@0 | 943 | nsAString::const_iterator begin, end; |
michael@0 | 944 | |
michael@0 | 945 | aStr.BeginReading(begin); |
michael@0 | 946 | aStr.EndReading(end); |
michael@0 | 947 | |
michael@0 | 948 | while (begin != end) { |
michael@0 | 949 | if (*begin == aChar) { |
michael@0 | 950 | ++count; |
michael@0 | 951 | } |
michael@0 | 952 | ++begin; |
michael@0 | 953 | } |
michael@0 | 954 | |
michael@0 | 955 | return count; |
michael@0 | 956 | } |
michael@0 | 957 | |
michael@0 | 958 | uint32_t |
michael@0 | 959 | CountCharInReadable( const nsACString& aStr, |
michael@0 | 960 | char aChar ) |
michael@0 | 961 | { |
michael@0 | 962 | uint32_t count = 0; |
michael@0 | 963 | nsACString::const_iterator begin, end; |
michael@0 | 964 | |
michael@0 | 965 | aStr.BeginReading(begin); |
michael@0 | 966 | aStr.EndReading(end); |
michael@0 | 967 | |
michael@0 | 968 | while (begin != end) { |
michael@0 | 969 | if (*begin == aChar) { |
michael@0 | 970 | ++count; |
michael@0 | 971 | } |
michael@0 | 972 | ++begin; |
michael@0 | 973 | } |
michael@0 | 974 | |
michael@0 | 975 | return count; |
michael@0 | 976 | } |
michael@0 | 977 | |
michael@0 | 978 | bool |
michael@0 | 979 | StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring, |
michael@0 | 980 | const nsStringComparator& aComparator ) |
michael@0 | 981 | { |
michael@0 | 982 | nsAString::size_type src_len = aSource.Length(), |
michael@0 | 983 | sub_len = aSubstring.Length(); |
michael@0 | 984 | if (sub_len > src_len) |
michael@0 | 985 | return false; |
michael@0 | 986 | return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); |
michael@0 | 987 | } |
michael@0 | 988 | |
michael@0 | 989 | bool |
michael@0 | 990 | StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring, |
michael@0 | 991 | const nsCStringComparator& aComparator ) |
michael@0 | 992 | { |
michael@0 | 993 | nsACString::size_type src_len = aSource.Length(), |
michael@0 | 994 | sub_len = aSubstring.Length(); |
michael@0 | 995 | if (sub_len > src_len) |
michael@0 | 996 | return false; |
michael@0 | 997 | return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); |
michael@0 | 998 | } |
michael@0 | 999 | |
michael@0 | 1000 | bool |
michael@0 | 1001 | StringEndsWith( const nsAString& aSource, const nsAString& aSubstring, |
michael@0 | 1002 | const nsStringComparator& aComparator ) |
michael@0 | 1003 | { |
michael@0 | 1004 | nsAString::size_type src_len = aSource.Length(), |
michael@0 | 1005 | sub_len = aSubstring.Length(); |
michael@0 | 1006 | if (sub_len > src_len) |
michael@0 | 1007 | return false; |
michael@0 | 1008 | return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, |
michael@0 | 1009 | aComparator); |
michael@0 | 1010 | } |
michael@0 | 1011 | |
michael@0 | 1012 | bool |
michael@0 | 1013 | StringEndsWith( const nsACString& aSource, const nsACString& aSubstring, |
michael@0 | 1014 | const nsCStringComparator& aComparator ) |
michael@0 | 1015 | { |
michael@0 | 1016 | nsACString::size_type src_len = aSource.Length(), |
michael@0 | 1017 | sub_len = aSubstring.Length(); |
michael@0 | 1018 | if (sub_len > src_len) |
michael@0 | 1019 | return false; |
michael@0 | 1020 | return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, |
michael@0 | 1021 | aComparator); |
michael@0 | 1022 | } |
michael@0 | 1023 | |
michael@0 | 1024 | |
michael@0 | 1025 | |
michael@0 | 1026 | static const char16_t empty_buffer[1] = { '\0' }; |
michael@0 | 1027 | |
michael@0 | 1028 | const nsAFlatString& |
michael@0 | 1029 | EmptyString() |
michael@0 | 1030 | { |
michael@0 | 1031 | static const nsDependentString sEmpty(empty_buffer); |
michael@0 | 1032 | |
michael@0 | 1033 | return sEmpty; |
michael@0 | 1034 | } |
michael@0 | 1035 | |
michael@0 | 1036 | const nsAFlatCString& |
michael@0 | 1037 | EmptyCString() |
michael@0 | 1038 | { |
michael@0 | 1039 | static const nsDependentCString sEmpty((const char *)empty_buffer); |
michael@0 | 1040 | |
michael@0 | 1041 | return sEmpty; |
michael@0 | 1042 | } |
michael@0 | 1043 | |
michael@0 | 1044 | const nsAFlatString& |
michael@0 | 1045 | NullString() |
michael@0 | 1046 | { |
michael@0 | 1047 | static const nsXPIDLString sNull; |
michael@0 | 1048 | |
michael@0 | 1049 | return sNull; |
michael@0 | 1050 | } |
michael@0 | 1051 | |
michael@0 | 1052 | const nsAFlatCString& |
michael@0 | 1053 | NullCString() |
michael@0 | 1054 | { |
michael@0 | 1055 | static const nsXPIDLCString sNull; |
michael@0 | 1056 | |
michael@0 | 1057 | return sNull; |
michael@0 | 1058 | } |
michael@0 | 1059 | |
michael@0 | 1060 | int32_t |
michael@0 | 1061 | CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, |
michael@0 | 1062 | const nsASingleFragmentString& aUTF16String) |
michael@0 | 1063 | { |
michael@0 | 1064 | static const uint32_t NOT_ASCII = uint32_t(~0x7F); |
michael@0 | 1065 | |
michael@0 | 1066 | const char *u8, *u8end; |
michael@0 | 1067 | aUTF8String.BeginReading(u8); |
michael@0 | 1068 | aUTF8String.EndReading(u8end); |
michael@0 | 1069 | |
michael@0 | 1070 | const char16_t *u16, *u16end; |
michael@0 | 1071 | aUTF16String.BeginReading(u16); |
michael@0 | 1072 | aUTF16String.EndReading(u16end); |
michael@0 | 1073 | |
michael@0 | 1074 | while (u8 != u8end && u16 != u16end) |
michael@0 | 1075 | { |
michael@0 | 1076 | // Cast away the signedness of *u8 to prevent signextension when |
michael@0 | 1077 | // converting to uint32_t |
michael@0 | 1078 | uint32_t c8_32 = (uint8_t)*u8; |
michael@0 | 1079 | |
michael@0 | 1080 | if (c8_32 & NOT_ASCII) |
michael@0 | 1081 | { |
michael@0 | 1082 | bool err; |
michael@0 | 1083 | c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err); |
michael@0 | 1084 | if (err) |
michael@0 | 1085 | return INT32_MIN; |
michael@0 | 1086 | |
michael@0 | 1087 | uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end); |
michael@0 | 1088 | // The above UTF16CharEnumerator::NextChar() calls can |
michael@0 | 1089 | // fail, but if it does for anything other than no data to |
michael@0 | 1090 | // look at (which can't happen here), it returns the |
michael@0 | 1091 | // Unicode replacement character 0xFFFD for the invalid |
michael@0 | 1092 | // data they were fed. Ignore that error and treat invalid |
michael@0 | 1093 | // UTF16 as 0xFFFD. |
michael@0 | 1094 | // |
michael@0 | 1095 | // This matches what our UTF16 to UTF8 conversion code |
michael@0 | 1096 | // does, and thus a UTF8 string that came from an invalid |
michael@0 | 1097 | // UTF16 string will compare equal to the invalid UTF16 |
michael@0 | 1098 | // string it came from. Same is true for any other UTF16 |
michael@0 | 1099 | // string differs only in the invalid part of the string. |
michael@0 | 1100 | |
michael@0 | 1101 | if (c8_32 != c16_32) |
michael@0 | 1102 | return c8_32 < c16_32 ? -1 : 1; |
michael@0 | 1103 | } |
michael@0 | 1104 | else |
michael@0 | 1105 | { |
michael@0 | 1106 | if (c8_32 != *u16) |
michael@0 | 1107 | return c8_32 > *u16 ? 1 : -1; |
michael@0 | 1108 | |
michael@0 | 1109 | ++u8; |
michael@0 | 1110 | ++u16; |
michael@0 | 1111 | } |
michael@0 | 1112 | } |
michael@0 | 1113 | |
michael@0 | 1114 | if (u8 != u8end) |
michael@0 | 1115 | { |
michael@0 | 1116 | // We get to the end of the UTF16 string, but no to the end of |
michael@0 | 1117 | // the UTF8 string. The UTF8 string is longer than the UTF16 |
michael@0 | 1118 | // string |
michael@0 | 1119 | |
michael@0 | 1120 | return 1; |
michael@0 | 1121 | } |
michael@0 | 1122 | |
michael@0 | 1123 | if (u16 != u16end) |
michael@0 | 1124 | { |
michael@0 | 1125 | // We get to the end of the UTF8 string, but no to the end of |
michael@0 | 1126 | // the UTF16 string. The UTF16 string is longer than the UTF8 |
michael@0 | 1127 | // string |
michael@0 | 1128 | |
michael@0 | 1129 | return -1; |
michael@0 | 1130 | } |
michael@0 | 1131 | |
michael@0 | 1132 | // The two strings match. |
michael@0 | 1133 | |
michael@0 | 1134 | return 0; |
michael@0 | 1135 | } |
michael@0 | 1136 | |
michael@0 | 1137 | void |
michael@0 | 1138 | AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) |
michael@0 | 1139 | { |
michael@0 | 1140 | NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); |
michael@0 | 1141 | if (IS_IN_BMP(aSource)) |
michael@0 | 1142 | { |
michael@0 | 1143 | aDest.Append(char16_t(aSource)); |
michael@0 | 1144 | } |
michael@0 | 1145 | else |
michael@0 | 1146 | { |
michael@0 | 1147 | aDest.Append(H_SURROGATE(aSource)); |
michael@0 | 1148 | aDest.Append(L_SURROGATE(aSource)); |
michael@0 | 1149 | } |
michael@0 | 1150 | } |