michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsReadableUtils.h" michael@0: michael@0: #include "nsMemory.h" michael@0: #include "nsString.h" michael@0: #include "nsTArray.h" michael@0: #include "nsUTF8Utils.h" michael@0: michael@0: void michael@0: LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest ) michael@0: { michael@0: aDest.Truncate(); michael@0: LossyAppendUTF16toASCII(aSource, aDest); michael@0: } michael@0: michael@0: void michael@0: CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest ) michael@0: { michael@0: aDest.Truncate(); michael@0: AppendASCIItoUTF16(aSource, aDest); michael@0: } michael@0: michael@0: void michael@0: LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest ) michael@0: { michael@0: aDest.Truncate(); michael@0: if (aSource) { michael@0: LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); michael@0: } michael@0: } michael@0: michael@0: void michael@0: CopyASCIItoUTF16( const char* aSource, nsAString& aDest ) michael@0: { michael@0: aDest.Truncate(); michael@0: if (aSource) { michael@0: AppendASCIItoUTF16(nsDependentCString(aSource), aDest); michael@0: } michael@0: } michael@0: michael@0: void michael@0: CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest ) michael@0: { michael@0: aDest.Truncate(); michael@0: AppendUTF16toUTF8(aSource, aDest); michael@0: } michael@0: michael@0: void michael@0: CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest ) michael@0: { michael@0: aDest.Truncate(); michael@0: AppendUTF8toUTF16(aSource, aDest); michael@0: } michael@0: michael@0: void michael@0: CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest ) michael@0: { michael@0: aDest.Truncate(); michael@0: AppendUTF16toUTF8(aSource, aDest); michael@0: } michael@0: michael@0: void michael@0: CopyUTF8toUTF16( const char* aSource, nsAString& aDest ) michael@0: { michael@0: aDest.Truncate(); michael@0: AppendUTF8toUTF16(aSource, aDest); michael@0: } michael@0: michael@0: void michael@0: LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest ) michael@0: { michael@0: uint32_t old_dest_length = aDest.Length(); michael@0: aDest.SetLength(old_dest_length + aSource.Length()); michael@0: michael@0: nsAString::const_iterator fromBegin, fromEnd; michael@0: michael@0: nsACString::iterator dest; michael@0: aDest.BeginWriting(dest); michael@0: michael@0: dest.advance(old_dest_length); michael@0: michael@0: // right now, this won't work on multi-fragment destinations michael@0: LossyConvertEncoding16to8 converter(dest.get()); michael@0: michael@0: copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); michael@0: } michael@0: michael@0: void michael@0: AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest ) michael@0: { michael@0: if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible_t())) { michael@0: NS_ABORT_OOM(aDest.Length() + aSource.Length()); michael@0: } michael@0: } michael@0: michael@0: bool michael@0: AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest, michael@0: const mozilla::fallible_t& ) michael@0: { michael@0: uint32_t old_dest_length = aDest.Length(); michael@0: if (!aDest.SetLength(old_dest_length + aSource.Length(), mozilla::fallible_t())) { michael@0: return false; michael@0: } michael@0: michael@0: nsACString::const_iterator fromBegin, fromEnd; michael@0: michael@0: nsAString::iterator dest; michael@0: aDest.BeginWriting(dest); michael@0: michael@0: dest.advance(old_dest_length); michael@0: michael@0: // right now, this won't work on multi-fragment destinations michael@0: LossyConvertEncoding8to16 converter(dest.get()); michael@0: michael@0: copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); michael@0: return true; michael@0: } michael@0: michael@0: void michael@0: LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest ) michael@0: { michael@0: if (aSource) { michael@0: LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); michael@0: } michael@0: } michael@0: michael@0: void michael@0: AppendASCIItoUTF16( const char* aSource, nsAString& aDest ) michael@0: { michael@0: if (aSource) { michael@0: AppendASCIItoUTF16(nsDependentCString(aSource), aDest); michael@0: } michael@0: } michael@0: michael@0: void michael@0: AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest ) michael@0: { michael@0: if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible_t())) { michael@0: NS_ABORT_OOM(aDest.Length() + aSource.Length()); michael@0: } michael@0: } michael@0: michael@0: bool michael@0: AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest, michael@0: const mozilla::fallible_t& ) michael@0: { michael@0: nsAString::const_iterator source_start, source_end; michael@0: CalculateUTF8Size calculator; michael@0: copy_string(aSource.BeginReading(source_start), michael@0: aSource.EndReading(source_end), calculator); michael@0: michael@0: uint32_t count = calculator.Size(); michael@0: michael@0: if (count) michael@0: { michael@0: uint32_t old_dest_length = aDest.Length(); michael@0: michael@0: // Grow the buffer if we need to. michael@0: if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) { michael@0: return false; michael@0: } michael@0: michael@0: // All ready? Time to convert michael@0: michael@0: ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length); michael@0: copy_string(aSource.BeginReading(source_start), michael@0: aSource.EndReading(source_end), converter); michael@0: michael@0: NS_ASSERTION(converter.Size() == count, michael@0: "Unexpected disparity between CalculateUTF8Size and " michael@0: "ConvertUTF16toUTF8"); michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: void michael@0: AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest ) michael@0: { michael@0: if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible_t())) { michael@0: NS_ABORT_OOM(aDest.Length() + aSource.Length()); michael@0: } michael@0: } michael@0: michael@0: bool michael@0: AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest, michael@0: const mozilla::fallible_t& ) michael@0: { michael@0: nsACString::const_iterator source_start, source_end; michael@0: CalculateUTF8Length calculator; michael@0: copy_string(aSource.BeginReading(source_start), michael@0: aSource.EndReading(source_end), calculator); michael@0: michael@0: uint32_t count = calculator.Length(); michael@0: michael@0: // Avoid making the string mutable if we're appending an empty string michael@0: if (count) michael@0: { michael@0: uint32_t old_dest_length = aDest.Length(); michael@0: michael@0: // Grow the buffer if we need to. michael@0: if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) { michael@0: return false; michael@0: } michael@0: michael@0: // All ready? Time to convert michael@0: michael@0: ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length); michael@0: copy_string(aSource.BeginReading(source_start), michael@0: aSource.EndReading(source_end), converter); michael@0: michael@0: NS_ASSERTION(converter.ErrorEncountered() || michael@0: converter.Length() == count, michael@0: "CalculateUTF8Length produced the wrong length"); michael@0: michael@0: if (converter.ErrorEncountered()) michael@0: { michael@0: NS_ERROR("Input wasn't UTF8 or incorrect length was calculated"); michael@0: aDest.SetLength(old_dest_length); michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: void michael@0: AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest ) michael@0: { michael@0: if (aSource) { michael@0: AppendUTF16toUTF8(nsDependentString(aSource), aDest); michael@0: } michael@0: } michael@0: michael@0: void michael@0: AppendUTF8toUTF16( const char* aSource, nsAString& aDest ) michael@0: { michael@0: if (aSource) { michael@0: AppendUTF8toUTF16(nsDependentCString(aSource), aDest); michael@0: } michael@0: } michael@0: michael@0: michael@0: /** michael@0: * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator). michael@0: * michael@0: * @param aSource an string you will eventually be making a copy of michael@0: * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|. michael@0: * michael@0: */ michael@0: template michael@0: inline michael@0: ToCharT* michael@0: AllocateStringCopy( const FromStringT& aSource, ToCharT* ) michael@0: { michael@0: return static_cast(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT))); michael@0: } michael@0: michael@0: michael@0: char* michael@0: ToNewCString( const nsAString& aSource ) michael@0: { michael@0: char* result = AllocateStringCopy(aSource, (char*)0); michael@0: if (!result) michael@0: return nullptr; michael@0: michael@0: nsAString::const_iterator fromBegin, fromEnd; michael@0: LossyConvertEncoding16to8 converter(result); michael@0: copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator(); michael@0: return result; michael@0: } michael@0: michael@0: char* michael@0: ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count ) michael@0: { michael@0: nsAString::const_iterator start, end; michael@0: CalculateUTF8Size calculator; michael@0: copy_string(aSource.BeginReading(start), aSource.EndReading(end), michael@0: calculator); michael@0: michael@0: if (aUTF8Count) michael@0: *aUTF8Count = calculator.Size(); michael@0: michael@0: char *result = static_cast michael@0: (nsMemory::Alloc(calculator.Size() + 1)); michael@0: if (!result) michael@0: return nullptr; michael@0: michael@0: ConvertUTF16toUTF8 converter(result); michael@0: copy_string(aSource.BeginReading(start), aSource.EndReading(end), michael@0: converter).write_terminator(); michael@0: NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); michael@0: michael@0: return result; michael@0: } michael@0: michael@0: char* michael@0: ToNewCString( const nsACString& aSource ) michael@0: { michael@0: // no conversion needed, just allocate a buffer of the correct length and copy into it michael@0: michael@0: char* result = AllocateStringCopy(aSource, (char*)0); michael@0: if (!result) michael@0: return nullptr; michael@0: michael@0: nsACString::const_iterator fromBegin, fromEnd; michael@0: char* toBegin = result; michael@0: *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0); michael@0: return result; michael@0: } michael@0: michael@0: char16_t* michael@0: ToNewUnicode( const nsAString& aSource ) michael@0: { michael@0: // no conversion needed, just allocate a buffer of the correct length and copy into it michael@0: michael@0: char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); michael@0: if (!result) michael@0: return nullptr; michael@0: michael@0: nsAString::const_iterator fromBegin, fromEnd; michael@0: char16_t* toBegin = result; michael@0: *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char16_t(0); michael@0: return result; michael@0: } michael@0: michael@0: char16_t* michael@0: ToNewUnicode( const nsACString& aSource ) michael@0: { michael@0: char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); michael@0: if (!result) michael@0: return nullptr; michael@0: michael@0: nsACString::const_iterator fromBegin, fromEnd; michael@0: LossyConvertEncoding8to16 converter(result); michael@0: copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator(); michael@0: return result; michael@0: } michael@0: michael@0: uint32_t michael@0: CalcUTF8ToUnicodeLength( const nsACString& aSource) michael@0: { michael@0: nsACString::const_iterator start, end; michael@0: CalculateUTF8Length calculator; michael@0: copy_string(aSource.BeginReading(start), aSource.EndReading(end), michael@0: calculator); michael@0: return calculator.Length(); michael@0: } michael@0: michael@0: char16_t* michael@0: UTF8ToUnicodeBuffer( const nsACString& aSource, char16_t* aBuffer, uint32_t *aUTF16Count ) michael@0: { michael@0: nsACString::const_iterator start, end; michael@0: ConvertUTF8toUTF16 converter(aBuffer); michael@0: copy_string(aSource.BeginReading(start), michael@0: aSource.EndReading(end), michael@0: converter).write_terminator(); michael@0: if (aUTF16Count) michael@0: *aUTF16Count = converter.Length(); michael@0: return aBuffer; michael@0: } michael@0: michael@0: char16_t* michael@0: UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count ) michael@0: { michael@0: const uint32_t length = CalcUTF8ToUnicodeLength(aSource); michael@0: const size_t buffer_size = (length + 1) * sizeof(char16_t); michael@0: char16_t *buffer = static_cast(nsMemory::Alloc(buffer_size)); michael@0: if (!buffer) michael@0: return nullptr; michael@0: michael@0: uint32_t copied; michael@0: UTF8ToUnicodeBuffer(aSource, buffer, &copied); michael@0: NS_ASSERTION(length == copied, "length mismatch"); michael@0: michael@0: if (aUTF16Count) michael@0: *aUTF16Count = copied; michael@0: return buffer; michael@0: } michael@0: michael@0: char16_t* michael@0: CopyUnicodeTo( const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, uint32_t aLength ) michael@0: { michael@0: nsAString::const_iterator fromBegin, fromEnd; michael@0: char16_t* toBegin = aDest; michael@0: copy_string(aSource.BeginReading(fromBegin).advance( int32_t(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( int32_t(aSrcOffset+aLength) ), toBegin); michael@0: return aDest; michael@0: } michael@0: michael@0: void michael@0: CopyUnicodeTo( const nsAString::const_iterator& aSrcStart, michael@0: const nsAString::const_iterator& aSrcEnd, michael@0: nsAString& aDest ) michael@0: { michael@0: nsAString::iterator writer; michael@0: aDest.SetLength(Distance(aSrcStart, aSrcEnd)); michael@0: michael@0: aDest.BeginWriting(writer); michael@0: nsAString::const_iterator fromBegin(aSrcStart); michael@0: michael@0: copy_string(fromBegin, aSrcEnd, writer); michael@0: } michael@0: michael@0: void michael@0: AppendUnicodeTo( const nsAString::const_iterator& aSrcStart, michael@0: const nsAString::const_iterator& aSrcEnd, michael@0: nsAString& aDest ) michael@0: { michael@0: nsAString::iterator writer; michael@0: uint32_t oldLength = aDest.Length(); michael@0: aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd)); michael@0: michael@0: aDest.BeginWriting(writer).advance(oldLength); michael@0: nsAString::const_iterator fromBegin(aSrcStart); michael@0: michael@0: copy_string(fromBegin, aSrcEnd, writer); michael@0: } michael@0: michael@0: bool michael@0: IsASCII( const nsAString& aString ) michael@0: { michael@0: static const char16_t NOT_ASCII = char16_t(~0x007F); michael@0: michael@0: michael@0: // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character michael@0: michael@0: nsAString::const_iterator iter, done_reading; michael@0: aString.BeginReading(iter); michael@0: aString.EndReading(done_reading); michael@0: michael@0: const char16_t* c = iter.get(); michael@0: const char16_t* end = done_reading.get(); michael@0: michael@0: while ( c < end ) michael@0: { michael@0: if ( *c++ & NOT_ASCII ) michael@0: return false; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool michael@0: IsASCII( const nsACString& aString ) michael@0: { michael@0: static const char NOT_ASCII = char(~0x7F); michael@0: michael@0: michael@0: // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character michael@0: michael@0: nsACString::const_iterator iter, done_reading; michael@0: aString.BeginReading(iter); michael@0: aString.EndReading(done_reading); michael@0: michael@0: const char* c = iter.get(); michael@0: const char* end = done_reading.get(); michael@0: michael@0: while ( c < end ) michael@0: { michael@0: if ( *c++ & NOT_ASCII ) michael@0: return false; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool michael@0: IsUTF8( const nsACString& aString, bool aRejectNonChar ) michael@0: { michael@0: nsReadingIterator done_reading; michael@0: aString.EndReading(done_reading); michael@0: michael@0: int32_t state = 0; michael@0: bool overlong = false; michael@0: bool surrogate = false; michael@0: bool nonchar = false; michael@0: uint16_t olupper = 0; // overlong byte upper bound. michael@0: uint16_t slower = 0; // surrogate byte lower bound. michael@0: michael@0: nsReadingIterator iter; michael@0: aString.BeginReading(iter); michael@0: michael@0: const char* ptr = iter.get(); michael@0: const char* end = done_reading.get(); michael@0: while ( ptr < end ) michael@0: { michael@0: uint8_t c; michael@0: michael@0: if (0 == state) michael@0: { michael@0: c = *ptr++; michael@0: michael@0: if ( UTF8traits::isASCII(c) ) michael@0: continue; michael@0: michael@0: if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong. michael@0: return false; michael@0: else if ( UTF8traits::is2byte(c) ) michael@0: state = 1; michael@0: else if ( UTF8traits::is3byte(c) ) michael@0: { michael@0: state = 2; michael@0: if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF] michael@0: { michael@0: overlong = true; michael@0: olupper = 0x9F; michael@0: } michael@0: else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint michael@0: { michael@0: surrogate = true; michael@0: slower = 0xA0; michael@0: } michael@0: else if ( c == 0xEF ) // EF BF [BE-BF] : non-character michael@0: nonchar = true; michael@0: } michael@0: else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090) michael@0: { michael@0: state = 3; michael@0: nonchar = true; michael@0: if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2} michael@0: { michael@0: overlong = true; michael@0: olupper = 0x8F; michael@0: } michael@0: else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF] michael@0: { michael@0: // actually not surrogates but codepoints beyond 0x10FFFF michael@0: surrogate = true; michael@0: slower = 0x90; michael@0: } michael@0: } michael@0: else michael@0: return false; // Not UTF-8 string michael@0: } michael@0: michael@0: if (nonchar && !aRejectNonChar) michael@0: nonchar = false; michael@0: michael@0: while ( ptr < end && state ) michael@0: { michael@0: c = *ptr++; michael@0: --state; michael@0: michael@0: // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF] michael@0: if ( nonchar && michael@0: ( ( !state && c < 0xBE ) || michael@0: ( state == 1 && c != 0xBF ) || michael@0: ( state == 2 && 0x0F != (0x0F & c) ))) michael@0: nonchar = false; michael@0: michael@0: if ( !UTF8traits::isInSeq(c) || ( overlong && c <= olupper ) || michael@0: ( surrogate && slower <= c ) || ( nonchar && !state )) michael@0: return false; // Not UTF-8 string michael@0: michael@0: overlong = surrogate = false; michael@0: } michael@0: } michael@0: return !state; // state != 0 at the end indicates an invalid UTF-8 seq. michael@0: } michael@0: michael@0: /** michael@0: * A character sink for in-place case conversion. michael@0: */ michael@0: class ConvertToUpperCase michael@0: { michael@0: public: michael@0: typedef char value_type; michael@0: michael@0: uint32_t michael@0: write( const char* aSource, uint32_t aSourceLength ) michael@0: { michael@0: char* cp = const_cast(aSource); michael@0: const char* end = aSource + aSourceLength; michael@0: while (cp != end) { michael@0: char ch = *cp; michael@0: if ((ch >= 'a') && (ch <= 'z')) michael@0: *cp = ch - ('a' - 'A'); michael@0: ++cp; michael@0: } michael@0: return aSourceLength; michael@0: } michael@0: }; michael@0: michael@0: void michael@0: ToUpperCase( nsCSubstring& aCString ) michael@0: { michael@0: ConvertToUpperCase converter; michael@0: char* start; michael@0: converter.write(aCString.BeginWriting(start), aCString.Length()); michael@0: } michael@0: michael@0: /** michael@0: * A character sink for copying with case conversion. michael@0: */ michael@0: class CopyToUpperCase michael@0: { michael@0: public: michael@0: typedef char value_type; michael@0: michael@0: CopyToUpperCase( nsACString::iterator& aDestIter ) michael@0: : mIter(aDestIter) michael@0: { michael@0: } michael@0: michael@0: uint32_t michael@0: write( const char* aSource, uint32_t aSourceLength ) michael@0: { michael@0: uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength); michael@0: char* cp = mIter.get(); michael@0: const char* end = aSource + len; michael@0: while (aSource != end) { michael@0: char ch = *aSource; michael@0: if ((ch >= 'a') && (ch <= 'z')) michael@0: *cp = ch - ('a' - 'A'); michael@0: else michael@0: *cp = ch; michael@0: ++aSource; michael@0: ++cp; michael@0: } michael@0: mIter.advance(len); michael@0: return len; michael@0: } michael@0: michael@0: protected: michael@0: nsACString::iterator& mIter; michael@0: }; michael@0: michael@0: void michael@0: ToUpperCase( const nsACString& aSource, nsACString& aDest ) michael@0: { michael@0: nsACString::const_iterator fromBegin, fromEnd; michael@0: nsACString::iterator toBegin; michael@0: aDest.SetLength(aSource.Length()); michael@0: michael@0: CopyToUpperCase converter(aDest.BeginWriting(toBegin)); michael@0: copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); michael@0: } michael@0: michael@0: /** michael@0: * A character sink for case conversion. michael@0: */ michael@0: class ConvertToLowerCase michael@0: { michael@0: public: michael@0: typedef char value_type; michael@0: michael@0: uint32_t michael@0: write( const char* aSource, uint32_t aSourceLength ) michael@0: { michael@0: char* cp = const_cast(aSource); michael@0: const char* end = aSource + aSourceLength; michael@0: while (cp != end) { michael@0: char ch = *cp; michael@0: if ((ch >= 'A') && (ch <= 'Z')) michael@0: *cp = ch + ('a' - 'A'); michael@0: ++cp; michael@0: } michael@0: return aSourceLength; michael@0: } michael@0: }; michael@0: michael@0: void michael@0: ToLowerCase( nsCSubstring& aCString ) michael@0: { michael@0: ConvertToLowerCase converter; michael@0: char* start; michael@0: converter.write(aCString.BeginWriting(start), aCString.Length()); michael@0: } michael@0: michael@0: /** michael@0: * A character sink for copying with case conversion. michael@0: */ michael@0: class CopyToLowerCase michael@0: { michael@0: public: michael@0: typedef char value_type; michael@0: michael@0: CopyToLowerCase( nsACString::iterator& aDestIter ) michael@0: : mIter(aDestIter) michael@0: { michael@0: } michael@0: michael@0: uint32_t michael@0: write( const char* aSource, uint32_t aSourceLength ) michael@0: { michael@0: uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength); michael@0: char* cp = mIter.get(); michael@0: const char* end = aSource + len; michael@0: while (aSource != end) { michael@0: char ch = *aSource; michael@0: if ((ch >= 'A') && (ch <= 'Z')) michael@0: *cp = ch + ('a' - 'A'); michael@0: else michael@0: *cp = ch; michael@0: ++aSource; michael@0: ++cp; michael@0: } michael@0: mIter.advance(len); michael@0: return len; michael@0: } michael@0: michael@0: protected: michael@0: nsACString::iterator& mIter; michael@0: }; michael@0: michael@0: void michael@0: ToLowerCase( const nsACString& aSource, nsACString& aDest ) michael@0: { michael@0: nsACString::const_iterator fromBegin, fromEnd; michael@0: nsACString::iterator toBegin; michael@0: aDest.SetLength(aSource.Length()); michael@0: michael@0: CopyToLowerCase converter(aDest.BeginWriting(toBegin)); michael@0: copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter); michael@0: } michael@0: michael@0: bool michael@0: ParseString(const nsACString& aSource, char aDelimiter, michael@0: nsTArray& aArray) michael@0: { michael@0: nsACString::const_iterator start, end; michael@0: aSource.BeginReading(start); michael@0: aSource.EndReading(end); michael@0: michael@0: uint32_t oldLength = aArray.Length(); michael@0: michael@0: for (;;) michael@0: { michael@0: nsACString::const_iterator delimiter = start; michael@0: FindCharInReadable(aDelimiter, delimiter, end); michael@0: michael@0: if (delimiter != start) michael@0: { michael@0: if (!aArray.AppendElement(Substring(start, delimiter))) michael@0: { michael@0: aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength); michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: if (delimiter == end) michael@0: break; michael@0: start = ++delimiter; michael@0: if (start == end) michael@0: break; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: template michael@0: bool michael@0: FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare ) michael@0: { michael@0: bool found_it = false; michael@0: michael@0: // only bother searching at all if we're given a non-empty range to search michael@0: if ( aSearchStart != aSearchEnd ) michael@0: { michael@0: IteratorT aPatternStart, aPatternEnd; michael@0: aPattern.BeginReading(aPatternStart); michael@0: aPattern.EndReading(aPatternEnd); michael@0: michael@0: // outer loop keeps searching till we find it or run out of string to search michael@0: while ( !found_it ) michael@0: { michael@0: // fast inner loop (that's what it's called, not what it is) looks for a potential match michael@0: while ( aSearchStart != aSearchEnd && michael@0: compare(aPatternStart.get(), aSearchStart.get(), 1, 1) ) michael@0: ++aSearchStart; michael@0: michael@0: // if we broke out of the `fast' loop because we're out of string ... we're done: no match michael@0: if ( aSearchStart == aSearchEnd ) michael@0: break; michael@0: michael@0: // otherwise, we're at a potential match, let's see if we really hit one michael@0: IteratorT testPattern(aPatternStart); michael@0: IteratorT testSearch(aSearchStart); michael@0: michael@0: // slow inner loop verifies the potential match (found by the `fast' loop) at the current position michael@0: for(;;) michael@0: { michael@0: // we already compared the first character in the outer loop, michael@0: // so we'll advance before the next comparison michael@0: ++testPattern; michael@0: ++testSearch; michael@0: michael@0: // if we verified all the way to the end of the pattern, then we found it! michael@0: if ( testPattern == aPatternEnd ) michael@0: { michael@0: found_it = true; michael@0: aSearchEnd = testSearch; // return the exact found range through the parameters michael@0: break; michael@0: } michael@0: michael@0: // if we got to end of the string we're searching before we hit the end of the michael@0: // pattern, we'll never find what we're looking for michael@0: if ( testSearch == aSearchEnd ) michael@0: { michael@0: aSearchStart = aSearchEnd; michael@0: break; michael@0: } michael@0: michael@0: // else if we mismatched ... it's time to advance to the next search position michael@0: // and get back into the `fast' loop michael@0: if ( compare(testPattern.get(), testSearch.get(), 1, 1) ) michael@0: { michael@0: ++aSearchStart; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: return found_it; michael@0: } michael@0: michael@0: /** michael@0: * This searches the entire string from right to left, and returns the first match found, if any. michael@0: */ michael@0: template michael@0: bool michael@0: RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare ) michael@0: { michael@0: IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; michael@0: aPattern.BeginReading(patternStart); michael@0: aPattern.EndReading(patternEnd); michael@0: michael@0: // Point to the last character in the pattern michael@0: --patternEnd; michael@0: // outer loop keeps searching till we run out of string to search michael@0: while ( aSearchStart != searchEnd ) michael@0: { michael@0: // Point to the end position of the next possible match michael@0: --searchEnd; michael@0: michael@0: // Check last character, if a match, explore further from here michael@0: if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 ) michael@0: { michael@0: // We're at a potential match, let's see if we really hit one michael@0: IteratorT testPattern(patternEnd); michael@0: IteratorT testSearch(searchEnd); michael@0: michael@0: // inner loop verifies the potential match at the current position michael@0: do michael@0: { michael@0: // if we verified all the way to the end of the pattern, then we found it! michael@0: if ( testPattern == patternStart ) michael@0: { michael@0: aSearchStart = testSearch; // point to start of match michael@0: aSearchEnd = ++searchEnd; // point to end of match michael@0: return true; michael@0: } michael@0: michael@0: // if we got to end of the string we're searching before we hit the end of the michael@0: // pattern, we'll never find what we're looking for michael@0: if ( testSearch == aSearchStart ) michael@0: { michael@0: aSearchStart = aSearchEnd; michael@0: return false; michael@0: } michael@0: michael@0: // test previous character for a match michael@0: --testPattern; michael@0: --testSearch; michael@0: } michael@0: while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 ); michael@0: } michael@0: } michael@0: michael@0: aSearchStart = aSearchEnd; michael@0: return false; michael@0: } michael@0: michael@0: bool michael@0: FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator ) michael@0: { michael@0: return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); michael@0: } michael@0: michael@0: bool michael@0: FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator) michael@0: { michael@0: return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); michael@0: } michael@0: michael@0: bool michael@0: CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd ) michael@0: { michael@0: return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator()); michael@0: } michael@0: michael@0: bool michael@0: RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator) michael@0: { michael@0: return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); michael@0: } michael@0: michael@0: bool michael@0: RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator) michael@0: { michael@0: return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); michael@0: } michael@0: michael@0: bool michael@0: FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd ) michael@0: { michael@0: int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); michael@0: michael@0: const char16_t* charFoundAt = nsCharTraits::find(aSearchStart.get(), fragmentLength, aChar); michael@0: if ( charFoundAt ) { michael@0: aSearchStart.advance( charFoundAt - aSearchStart.get() ); michael@0: return true; michael@0: } michael@0: michael@0: aSearchStart.advance(fragmentLength); michael@0: return false; michael@0: } michael@0: michael@0: bool michael@0: FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd ) michael@0: { michael@0: int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); michael@0: michael@0: const char* charFoundAt = nsCharTraits::find(aSearchStart.get(), fragmentLength, aChar); michael@0: if ( charFoundAt ) { michael@0: aSearchStart.advance( charFoundAt - aSearchStart.get() ); michael@0: return true; michael@0: } michael@0: michael@0: aSearchStart.advance(fragmentLength); michael@0: return false; michael@0: } michael@0: michael@0: uint32_t michael@0: CountCharInReadable( const nsAString& aStr, michael@0: char16_t aChar ) michael@0: { michael@0: uint32_t count = 0; michael@0: nsAString::const_iterator begin, end; michael@0: michael@0: aStr.BeginReading(begin); michael@0: aStr.EndReading(end); michael@0: michael@0: while (begin != end) { michael@0: if (*begin == aChar) { michael@0: ++count; michael@0: } michael@0: ++begin; michael@0: } michael@0: michael@0: return count; michael@0: } michael@0: michael@0: uint32_t michael@0: CountCharInReadable( const nsACString& aStr, michael@0: char aChar ) michael@0: { michael@0: uint32_t count = 0; michael@0: nsACString::const_iterator begin, end; michael@0: michael@0: aStr.BeginReading(begin); michael@0: aStr.EndReading(end); michael@0: michael@0: while (begin != end) { michael@0: if (*begin == aChar) { michael@0: ++count; michael@0: } michael@0: ++begin; michael@0: } michael@0: michael@0: return count; michael@0: } michael@0: michael@0: bool michael@0: StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring, michael@0: const nsStringComparator& aComparator ) michael@0: { michael@0: nsAString::size_type src_len = aSource.Length(), michael@0: sub_len = aSubstring.Length(); michael@0: if (sub_len > src_len) michael@0: return false; michael@0: return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); michael@0: } michael@0: michael@0: bool michael@0: StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring, michael@0: const nsCStringComparator& aComparator ) michael@0: { michael@0: nsACString::size_type src_len = aSource.Length(), michael@0: sub_len = aSubstring.Length(); michael@0: if (sub_len > src_len) michael@0: return false; michael@0: return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); michael@0: } michael@0: michael@0: bool michael@0: StringEndsWith( const nsAString& aSource, const nsAString& aSubstring, michael@0: const nsStringComparator& aComparator ) michael@0: { michael@0: nsAString::size_type src_len = aSource.Length(), michael@0: sub_len = aSubstring.Length(); michael@0: if (sub_len > src_len) michael@0: return false; michael@0: return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, michael@0: aComparator); michael@0: } michael@0: michael@0: bool michael@0: StringEndsWith( const nsACString& aSource, const nsACString& aSubstring, michael@0: const nsCStringComparator& aComparator ) michael@0: { michael@0: nsACString::size_type src_len = aSource.Length(), michael@0: sub_len = aSubstring.Length(); michael@0: if (sub_len > src_len) michael@0: return false; michael@0: return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, michael@0: aComparator); michael@0: } michael@0: michael@0: michael@0: michael@0: static const char16_t empty_buffer[1] = { '\0' }; michael@0: michael@0: const nsAFlatString& michael@0: EmptyString() michael@0: { michael@0: static const nsDependentString sEmpty(empty_buffer); michael@0: michael@0: return sEmpty; michael@0: } michael@0: michael@0: const nsAFlatCString& michael@0: EmptyCString() michael@0: { michael@0: static const nsDependentCString sEmpty((const char *)empty_buffer); michael@0: michael@0: return sEmpty; michael@0: } michael@0: michael@0: const nsAFlatString& michael@0: NullString() michael@0: { michael@0: static const nsXPIDLString sNull; michael@0: michael@0: return sNull; michael@0: } michael@0: michael@0: const nsAFlatCString& michael@0: NullCString() michael@0: { michael@0: static const nsXPIDLCString sNull; michael@0: michael@0: return sNull; michael@0: } michael@0: michael@0: int32_t michael@0: CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, michael@0: const nsASingleFragmentString& aUTF16String) michael@0: { michael@0: static const uint32_t NOT_ASCII = uint32_t(~0x7F); michael@0: michael@0: const char *u8, *u8end; michael@0: aUTF8String.BeginReading(u8); michael@0: aUTF8String.EndReading(u8end); michael@0: michael@0: const char16_t *u16, *u16end; michael@0: aUTF16String.BeginReading(u16); michael@0: aUTF16String.EndReading(u16end); michael@0: michael@0: while (u8 != u8end && u16 != u16end) michael@0: { michael@0: // Cast away the signedness of *u8 to prevent signextension when michael@0: // converting to uint32_t michael@0: uint32_t c8_32 = (uint8_t)*u8; michael@0: michael@0: if (c8_32 & NOT_ASCII) michael@0: { michael@0: bool err; michael@0: c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err); michael@0: if (err) michael@0: return INT32_MIN; michael@0: michael@0: uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end); michael@0: // The above UTF16CharEnumerator::NextChar() calls can michael@0: // fail, but if it does for anything other than no data to michael@0: // look at (which can't happen here), it returns the michael@0: // Unicode replacement character 0xFFFD for the invalid michael@0: // data they were fed. Ignore that error and treat invalid michael@0: // UTF16 as 0xFFFD. michael@0: // michael@0: // This matches what our UTF16 to UTF8 conversion code michael@0: // does, and thus a UTF8 string that came from an invalid michael@0: // UTF16 string will compare equal to the invalid UTF16 michael@0: // string it came from. Same is true for any other UTF16 michael@0: // string differs only in the invalid part of the string. michael@0: michael@0: if (c8_32 != c16_32) michael@0: return c8_32 < c16_32 ? -1 : 1; michael@0: } michael@0: else michael@0: { michael@0: if (c8_32 != *u16) michael@0: return c8_32 > *u16 ? 1 : -1; michael@0: michael@0: ++u8; michael@0: ++u16; michael@0: } michael@0: } michael@0: michael@0: if (u8 != u8end) michael@0: { michael@0: // We get to the end of the UTF16 string, but no to the end of michael@0: // the UTF8 string. The UTF8 string is longer than the UTF16 michael@0: // string michael@0: michael@0: return 1; michael@0: } michael@0: michael@0: if (u16 != u16end) michael@0: { michael@0: // We get to the end of the UTF8 string, but no to the end of michael@0: // the UTF16 string. The UTF16 string is longer than the UTF8 michael@0: // string michael@0: michael@0: return -1; michael@0: } michael@0: michael@0: // The two strings match. michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: void michael@0: AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) michael@0: { michael@0: NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); michael@0: if (IS_IN_BMP(aSource)) michael@0: { michael@0: aDest.Append(char16_t(aSource)); michael@0: } michael@0: else michael@0: { michael@0: aDest.Append(H_SURROGATE(aSource)); michael@0: aDest.Append(L_SURROGATE(aSource)); michael@0: } michael@0: }