michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: // IWYU pragma: private, include "nsString.h" michael@0: michael@0: #ifndef nsReadableUtils_h___ michael@0: #define nsReadableUtils_h___ michael@0: michael@0: /** michael@0: * I guess all the routines in this file are all mis-named. michael@0: * According to our conventions, they should be |NS_xxx|. michael@0: */ michael@0: michael@0: #include "nsAString.h" michael@0: michael@0: #include "nsTArrayForwardDeclare.h" michael@0: michael@0: inline size_t Distance( const nsReadingIterator& start, const nsReadingIterator& end ) michael@0: { michael@0: return end.get() - start.get(); michael@0: } michael@0: inline size_t Distance( const nsReadingIterator& start, const nsReadingIterator& end ) michael@0: { michael@0: return end.get() - start.get(); michael@0: } michael@0: michael@0: void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest ); michael@0: void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest ); michael@0: michael@0: void LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest ); michael@0: void CopyASCIItoUTF16( const char* aSource, nsAString& aDest ); michael@0: michael@0: void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest ); michael@0: void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest ); michael@0: michael@0: void CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest ); michael@0: void CopyUTF8toUTF16( const char* aSource, nsAString& aDest ); michael@0: michael@0: void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest ); michael@0: void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest ); michael@0: bool AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest, michael@0: const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; michael@0: michael@0: void LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest ); michael@0: void AppendASCIItoUTF16( const char* aSource, nsAString& aDest ); michael@0: michael@0: void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest ); michael@0: bool AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest, michael@0: const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; michael@0: void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest ); michael@0: bool AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest, michael@0: const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; michael@0: michael@0: void AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest ); michael@0: void AppendUTF8toUTF16( const char* aSource, nsAString& aDest ); michael@0: michael@0: #ifdef MOZ_USE_CHAR16_WRAPPER michael@0: inline void AppendUTF16toUTF8( char16ptr_t aSource, nsACString& aDest ) michael@0: { michael@0: return AppendUTF16toUTF8(static_cast(aSource), aDest); michael@0: } michael@0: #endif michael@0: michael@0: /** michael@0: * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. michael@0: * michael@0: * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|. michael@0: * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer. michael@0: * This conversion is not well defined; but it reproduces legacy string behavior. michael@0: * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. michael@0: * michael@0: * @param aSource a 16-bit wide string michael@0: * @return a new |char| buffer you must free with |nsMemory::Free|. michael@0: */ michael@0: char* ToNewCString( const nsAString& aSource ); michael@0: michael@0: michael@0: /** michael@0: * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. michael@0: * michael@0: * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|. michael@0: * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. michael@0: * michael@0: * @param aSource an 8-bit wide string michael@0: * @return a new |char| buffer you must free with |nsMemory::Free|. michael@0: */ michael@0: char* ToNewCString( const nsACString& aSource ); michael@0: michael@0: /** michael@0: * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. michael@0: * michael@0: * Allocates and returns a new |char| buffer which you must free with michael@0: * |nsMemory::Free|. michael@0: * Performs an encoding conversion from a UTF-16 string to a UTF-8 string michael@0: * copying |aSource| to your new buffer. michael@0: * The new buffer is zero-terminated, but that may not help you if |aSource| michael@0: * contains embedded nulls. michael@0: * michael@0: * @param aSource a UTF-16 string (made of char16_t's) michael@0: * @param aUTF8Count the number of 8-bit units that was returned michael@0: * @return a new |char| buffer you must free with |nsMemory::Free|. michael@0: */ michael@0: michael@0: char* ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count = nullptr ); michael@0: michael@0: michael@0: /** michael@0: * Returns a new |char16_t| buffer containing a zero-terminated copy of michael@0: * |aSource|. michael@0: * michael@0: * Allocates and returns a new |char16_t| buffer which you must free with michael@0: * |nsMemory::Free|. michael@0: * The new buffer is zero-terminated, but that may not help you if |aSource| michael@0: * contains embedded nulls. michael@0: * michael@0: * @param aSource a UTF-16 string michael@0: * @return a new |char16_t| buffer you must free with |nsMemory::Free|. michael@0: */ michael@0: char16_t* ToNewUnicode( const nsAString& aSource ); michael@0: michael@0: michael@0: /** michael@0: * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|. michael@0: * michael@0: * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|. michael@0: * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer. michael@0: * This conversion is not well defined; but it reproduces legacy string behavior. michael@0: * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. michael@0: * michael@0: * @param aSource an 8-bit wide string (a C-string, NOT UTF-8) michael@0: * @return a new |char16_t| buffer you must free with |nsMemory::Free|. michael@0: */ michael@0: char16_t* ToNewUnicode( const nsACString& aSource ); michael@0: michael@0: /** michael@0: * Returns the required length for a char16_t buffer holding michael@0: * a copy of aSource, using UTF-8 to UTF-16 conversion. michael@0: * The length does NOT include any space for zero-termination. michael@0: * michael@0: * @param aSource an 8-bit wide string, UTF-8 encoded michael@0: * @return length of UTF-16 encoded string copy, not zero-terminated michael@0: */ michael@0: uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource ); michael@0: michael@0: /** michael@0: * Copies the source string into the specified buffer, converting UTF-8 to michael@0: * UTF-16 in the process. The conversion is well defined for valid UTF-8 michael@0: * strings. michael@0: * The copied string will be zero-terminated! Any embedded nulls will be michael@0: * copied nonetheless. It is the caller's responsiblity to ensure the buffer michael@0: * is large enough to hold the string copy plus one char16_t for michael@0: * zero-termination! michael@0: * michael@0: * @see CalcUTF8ToUnicodeLength( const nsACString& ) michael@0: * @see UTF8ToNewUnicode( const nsACString&, uint32_t* ) michael@0: * michael@0: * @param aSource an 8-bit wide string, UTF-8 encoded michael@0: * @param aBuffer the buffer holding the converted string copy michael@0: * @param aUTF16Count receiving optionally the number of 16-bit units that michael@0: * were copied michael@0: * @return aBuffer pointer, for convenience michael@0: */ michael@0: char16_t* UTF8ToUnicodeBuffer( const nsACString& aSource, michael@0: char16_t *aBuffer, michael@0: uint32_t *aUTF16Count = nullptr ); michael@0: michael@0: /** michael@0: * Returns a new |char16_t| buffer containing a zero-terminated copy michael@0: * of |aSource|. michael@0: * michael@0: * Allocates and returns a new |char| buffer which you must free with michael@0: * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16 michael@0: * while copying |aSource| to your new buffer. This conversion is well defined michael@0: * for a valid UTF-8 string. The new buffer is zero-terminated, but that michael@0: * may not help you if |aSource| contains embedded nulls. michael@0: * michael@0: * @param aSource an 8-bit wide string, UTF-8 encoded michael@0: * @param aUTF16Count the number of 16-bit units that was returned michael@0: * @return a new |char16_t| buffer you must free with |nsMemory::Free|. michael@0: * (UTF-16 encoded) michael@0: */ michael@0: char16_t* UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count = nullptr ); michael@0: michael@0: /** michael@0: * Copies |aLength| 16-bit code units from the start of |aSource| to the michael@0: * |char16_t| buffer |aDest|. michael@0: * michael@0: * After this operation |aDest| is not null terminated. michael@0: * michael@0: * @param aSource a UTF-16 string michael@0: * @param aSrcOffset start offset in the source string michael@0: * @param aDest a |char16_t| buffer michael@0: * @param aLength the number of 16-bit code units to copy michael@0: * @return pointer to destination buffer - identical to |aDest| michael@0: */ michael@0: char16_t* CopyUnicodeTo( const nsAString& aSource, michael@0: uint32_t aSrcOffset, michael@0: char16_t* aDest, michael@0: uint32_t aLength ); michael@0: michael@0: michael@0: /** michael@0: * Copies 16-bit characters between iterators |aSrcStart| and michael@0: * |aSrcEnd| to the writable string |aDest|. Similar to the michael@0: * |nsString::Mid| method. michael@0: * michael@0: * After this operation |aDest| is not null terminated. michael@0: * michael@0: * @param aSrcStart start source iterator michael@0: * @param aSrcEnd end source iterator michael@0: * @param aDest destination for the copy michael@0: */ michael@0: void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart, michael@0: const nsAString::const_iterator& aSrcEnd, michael@0: nsAString& aDest ); michael@0: michael@0: /** michael@0: * Appends 16-bit characters between iterators |aSrcStart| and michael@0: * |aSrcEnd| to the writable string |aDest|. michael@0: * michael@0: * After this operation |aDest| is not null terminated. michael@0: * michael@0: * @param aSrcStart start source iterator michael@0: * @param aSrcEnd end source iterator michael@0: * @param aDest destination for the copy michael@0: */ michael@0: void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart, michael@0: const nsAString::const_iterator& aSrcEnd, michael@0: nsAString& aDest ); michael@0: michael@0: /** michael@0: * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). michael@0: * michael@0: * @param aString a 16-bit wide string to scan michael@0: */ michael@0: bool IsASCII( const nsAString& aString ); michael@0: michael@0: /** michael@0: * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). michael@0: * michael@0: * @param aString a 8-bit wide string to scan michael@0: */ michael@0: bool IsASCII( const nsACString& aString ); michael@0: michael@0: /** michael@0: * Returns |true| if |aString| is a valid UTF-8 string. michael@0: * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator. michael@0: * It is mainly written to replace and roughly equivalent to michael@0: * michael@0: * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str))) michael@0: * michael@0: * (see bug 191541) michael@0: * As such, it does not check for non-UTF-8 7bit encodings such as michael@0: * ISO-2022-JP and HZ. michael@0: * michael@0: * It rejects sequences with the following errors: michael@0: * michael@0: * byte sequences that cannot be decoded into characters according to michael@0: * UTF-8's rules (including cases where the input is part of a valid michael@0: * UTF-8 sequence but starts or ends mid-character) michael@0: * overlong sequences (i.e., cases where a character was encoded michael@0: * non-canonically by using more bytes than necessary) michael@0: * surrogate codepoints (i.e., the codepoints reserved for michael@0: representing astral characters in UTF-16) michael@0: * codepoints above the unicode range (i.e., outside the first 17 michael@0: * planes; higher than U+10FFFF), in accordance with michael@0: * http://tools.ietf.org/html/rfc3629 michael@0: * when aRejectNonChar is true (the default), any codepoint whose low michael@0: * 16 bits are 0xFFFE or 0xFFFF michael@0: michael@0: * michael@0: * @param aString an 8-bit wide string to scan michael@0: * @param aRejectNonChar a boolean to control the rejection of utf-8 michael@0: * non characters michael@0: */ michael@0: bool IsUTF8( const nsACString& aString, bool aRejectNonChar = true ); michael@0: michael@0: bool ParseString(const nsACString& aAstring, char aDelimiter, michael@0: nsTArray& aArray); michael@0: michael@0: /** michael@0: * Converts case in place in the argument string. michael@0: */ michael@0: void ToUpperCase( nsACString& ); michael@0: michael@0: void ToLowerCase( nsACString& ); michael@0: michael@0: void ToUpperCase( nsCSubstring& ); michael@0: michael@0: void ToLowerCase( nsCSubstring& ); michael@0: michael@0: /** michael@0: * Converts case from string aSource to aDest. michael@0: */ michael@0: void ToUpperCase( const nsACString& aSource, nsACString& aDest ); michael@0: michael@0: void ToLowerCase( const nsACString& aSource, nsACString& aDest ); michael@0: michael@0: /** michael@0: * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|. michael@0: * michael@0: * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to michael@0: * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|. michael@0: * michael@0: * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|. michael@0: * If we need something faster, then we can implement that later. michael@0: */ michael@0: michael@0: bool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() ); michael@0: bool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() ); michael@0: michael@0: /* sometimes we don't care about where the string was, just that we michael@0: * found it or not */ michael@0: inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() ) michael@0: { michael@0: nsAString::const_iterator start, end; michael@0: aSource.BeginReading(start); michael@0: aSource.EndReading(end); michael@0: return FindInReadable(aPattern, start, end, compare); michael@0: } michael@0: michael@0: inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() ) michael@0: { michael@0: nsACString::const_iterator start, end; michael@0: aSource.BeginReading(start); michael@0: aSource.EndReading(end); michael@0: return FindInReadable(aPattern, start, end, compare); michael@0: } michael@0: michael@0: michael@0: bool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& ); michael@0: michael@0: /** michael@0: * Finds the rightmost occurrence of |aPattern| michael@0: * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to michael@0: * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|. michael@0: * michael@0: */ michael@0: bool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() ); michael@0: bool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() ); michael@0: michael@0: /** michael@0: * Finds the leftmost occurrence of |aChar|, if any in the range michael@0: * |aSearchStart|..|aSearchEnd|. michael@0: * michael@0: * Returns |true| if a match was found, and adjusts |aSearchStart| to michael@0: * point to the match. If no match was found, returns |false| and michael@0: * makes |aSearchStart == aSearchEnd|. michael@0: */ michael@0: bool FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd ); michael@0: bool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd ); michael@0: michael@0: /** michael@0: * Finds the number of occurences of |aChar| in the string |aStr| michael@0: */ michael@0: uint32_t CountCharInReadable( const nsAString& aStr, michael@0: char16_t aChar ); michael@0: uint32_t CountCharInReadable( const nsACString& aStr, michael@0: char aChar ); michael@0: michael@0: bool michael@0: StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring, michael@0: const nsStringComparator& aComparator = michael@0: nsDefaultStringComparator() ); michael@0: bool michael@0: StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring, michael@0: const nsCStringComparator& aComparator = michael@0: nsDefaultCStringComparator() ); michael@0: bool michael@0: StringEndsWith( const nsAString& aSource, const nsAString& aSubstring, michael@0: const nsStringComparator& aComparator = michael@0: nsDefaultStringComparator() ); michael@0: bool michael@0: StringEndsWith( const nsACString& aSource, const nsACString& aSubstring, michael@0: const nsCStringComparator& aComparator = michael@0: nsDefaultCStringComparator() ); michael@0: michael@0: const nsAFlatString& EmptyString(); michael@0: const nsAFlatCString& EmptyCString(); michael@0: michael@0: const nsAFlatString& NullString(); michael@0: const nsAFlatCString& NullCString(); michael@0: michael@0: /** michael@0: * Compare a UTF-8 string to an UTF-16 string. michael@0: * michael@0: * Returns 0 if the strings are equal, -1 if aUTF8String is less michael@0: * than aUTF16Count, and 1 in the reverse case. In case of fatal michael@0: * error (eg the strings are not valid UTF8 and UTF16 respectively), michael@0: * this method will return INT32_MIN. michael@0: */ michael@0: int32_t michael@0: CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, michael@0: const nsASingleFragmentString& aUTF16String); michael@0: michael@0: void michael@0: AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest); michael@0: michael@0: template michael@0: inline bool EnsureStringLength(T& aStr, uint32_t aLen) michael@0: { michael@0: aStr.SetLength(aLen); michael@0: return (aStr.Length() == aLen); michael@0: } michael@0: michael@0: #endif // !defined(nsReadableUtils_h___)