The Tor Browser: xpcom/string/public/nsReadableUtils.h@b8a032363ba2

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

     2 /* This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this

     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     5 // IWYU pragma: private, include "nsString.h"

     7 #ifndef nsReadableUtils_h___

     8 #define nsReadableUtils_h___

    10   /**

    11    * I guess all the routines in this file are all mis-named.

    12    * According to our conventions, they should be |NS_xxx|.

    13    */

    15 #include "nsAString.h"

    17 #include "nsTArrayForwardDeclare.h"

    19 inline size_t Distance( const nsReadingIterator<char16_t>& start, const nsReadingIterator<char16_t>& end )

    20   {

    21     return end.get() - start.get();

    22   }

    23 inline size_t Distance( const nsReadingIterator<char>& start, const nsReadingIterator<char>& end )

    24   {

    25     return end.get() - start.get();

    26   }

    28 void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest );

    29 void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest );

    31 void LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest );

    32 void CopyASCIItoUTF16( const char* aSource, nsAString& aDest );

    34 void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest );

    35 void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest );

    37 void CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest );

    38 void CopyUTF8toUTF16( const char* aSource, nsAString& aDest );

    40 void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest );

    41 void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest );

    42 bool AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest,

    43                          const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;

    45 void LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest );

    46 void AppendASCIItoUTF16( const char* aSource, nsAString& aDest );

    48 void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest );

    49 bool AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest,

    50                         const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;

    51 void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest );

    52 bool AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest,

    53                         const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;

    55 void AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest );

    56 void AppendUTF8toUTF16( const char* aSource, nsAString& aDest );

    58 #ifdef MOZ_USE_CHAR16_WRAPPER

    59 inline void AppendUTF16toUTF8( char16ptr_t aSource, nsACString& aDest )

    60   {

    61     return AppendUTF16toUTF8(static_cast<const char16_t*>(aSource), aDest);

    62   }

    63 #endif

    65   /**

    66    * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.

    67    *

    68    * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.

    69    * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.

    70    * This conversion is not well defined; but it reproduces legacy string behavior.

    71    * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.

    72    *

    73    * @param aSource a 16-bit wide string

    74    * @return a new |char| buffer you must free with |nsMemory::Free|.

    75    */

    76 char* ToNewCString( const nsAString& aSource );

    79   /**

    80    * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.

    81    *

    82    * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.

    83    * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.

    84    *

    85    * @param aSource an 8-bit wide string

    86    * @return a new |char| buffer you must free with |nsMemory::Free|.

    87    */

    88 char* ToNewCString( const nsACString& aSource );

    90   /**

    91    * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.

    92    *

    93    * Allocates and returns a new |char| buffer which you must free with

    94    * |nsMemory::Free|.

    95    * Performs an encoding conversion from a UTF-16 string to a UTF-8 string

    96    * copying |aSource| to your new buffer.

    97    * The new buffer is zero-terminated, but that may not help you if |aSource|

    98    * contains embedded nulls.

    99    *

   100    * @param aSource a UTF-16 string (made of char16_t's)

   101    * @param aUTF8Count the number of 8-bit units that was returned

   102    * @return a new |char| buffer you must free with |nsMemory::Free|.

   103    */

   105 char* ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count = nullptr );

   108   /**

   109    * Returns a new |char16_t| buffer containing a zero-terminated copy of

   110    * |aSource|.

   111    *

   112    * Allocates and returns a new |char16_t| buffer which you must free with

   113    * |nsMemory::Free|.

   114    * The new buffer is zero-terminated, but that may not help you if |aSource|

   115    * contains embedded nulls.

   116    *

   117    * @param aSource a UTF-16 string

   118    * @return a new |char16_t| buffer you must free with |nsMemory::Free|.

   119    */

   120 char16_t* ToNewUnicode( const nsAString& aSource );

   123   /**

   124    * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.

   125    *

   126    * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|.

   127    * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.

   128    * This conversion is not well defined; but it reproduces legacy string behavior.

   129    * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.

   130    *

   131    * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)

   132    * @return a new |char16_t| buffer you must free with |nsMemory::Free|.

   133    */

   134 char16_t* ToNewUnicode( const nsACString& aSource );

   136   /**

   137    * Returns the required length for a char16_t buffer holding

   138    * a copy of aSource, using UTF-8 to UTF-16 conversion.

   139    * The length does NOT include any space for zero-termination.

   140    *

   141    * @param aSource an 8-bit wide string, UTF-8 encoded

   142    * @return length of UTF-16 encoded string copy, not zero-terminated

   143    */

   144 uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource );

   146   /**

   147    * Copies the source string into the specified buffer, converting UTF-8 to

   148    * UTF-16 in the process. The conversion is well defined for valid UTF-8

   149    * strings.

   150    * The copied string will be zero-terminated! Any embedded nulls will be

   151    * copied nonetheless. It is the caller's responsiblity to ensure the buffer

   152    * is large enough to hold the string copy plus one char16_t for

   153    * zero-termination!

   154    *

   155    * @see CalcUTF8ToUnicodeLength( const nsACString& )

   156    * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )

   157    *

   158    * @param aSource an 8-bit wide string, UTF-8 encoded

   159    * @param aBuffer the buffer holding the converted string copy

   160    * @param aUTF16Count receiving optionally the number of 16-bit units that

   161    *                    were copied

   162    * @return aBuffer pointer, for convenience

   163    */

   164 char16_t* UTF8ToUnicodeBuffer( const nsACString& aSource,

   165                                 char16_t *aBuffer,

   166                                 uint32_t *aUTF16Count = nullptr );

   168   /**

   169    * Returns a new |char16_t| buffer containing a zero-terminated copy

   170    * of |aSource|.

   171    *

   172    * Allocates and returns a new |char| buffer which you must free with

   173    * |nsMemory::Free|.  Performs an encoding conversion from UTF-8 to UTF-16

   174    * while copying |aSource| to your new buffer.  This conversion is well defined

   175    * for a valid UTF-8 string.  The new buffer is zero-terminated, but that

   176    * may not help you if |aSource| contains embedded nulls.

   177    *

   178    * @param aSource an 8-bit wide string, UTF-8 encoded

   179    * @param aUTF16Count the number of 16-bit units that was returned

   180    * @return a new |char16_t| buffer you must free with |nsMemory::Free|.

   181    *         (UTF-16 encoded)

   182    */

   183 char16_t* UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count = nullptr );

   185   /**

   186    * Copies |aLength| 16-bit code units from the start of |aSource| to the

   187    * |char16_t| buffer |aDest|.

   188    *

   189    * After this operation |aDest| is not null terminated.

   190    *

   191    * @param aSource a UTF-16 string

   192    * @param aSrcOffset start offset in the source string

   193    * @param aDest a |char16_t| buffer

   194    * @param aLength the number of 16-bit code units to copy

   195    * @return pointer to destination buffer - identical to |aDest|

   196    */

   197 char16_t* CopyUnicodeTo( const nsAString& aSource,

   198                                  uint32_t aSrcOffset,

   199                                  char16_t* aDest,

   200                                  uint32_t aLength );

   203   /**

   204    * Copies 16-bit characters between iterators |aSrcStart| and

   205    * |aSrcEnd| to the writable string |aDest|. Similar to the

   206    * |nsString::Mid| method.

   207    *

   208    * After this operation |aDest| is not null terminated.

   209    *

   210    * @param aSrcStart start source iterator

   211    * @param aSrcEnd end source iterator

   212    * @param aDest destination for the copy

   213    */

   214 void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,

   215                            const nsAString::const_iterator& aSrcEnd,

   216                            nsAString& aDest );

   218   /**

   219    * Appends 16-bit characters between iterators |aSrcStart| and

   220    * |aSrcEnd| to the writable string |aDest|.

   221    *

   222    * After this operation |aDest| is not null terminated.

   223    *

   224    * @param aSrcStart start source iterator

   225    * @param aSrcEnd end source iterator

   226    * @param aDest destination for the copy

   227    */

   228 void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,

   229                              const nsAString::const_iterator& aSrcEnd,

   230                              nsAString& aDest );

   232   /**

   233    * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).

   234    *

   235    * @param aString a 16-bit wide string to scan

   236    */

   237 bool IsASCII( const nsAString& aString );

   239   /**

   240    * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).

   241    *

   242    * @param aString a 8-bit wide string to scan

   243    */

   244 bool IsASCII( const nsACString& aString );

   246   /**

   247    * Returns |true| if |aString| is a valid UTF-8 string.

   248    * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.

   249    * It is mainly written to replace and roughly equivalent to

   250    *

   251    *    str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))

   252    *

   253    * (see bug 191541)

   254    * As such,  it does not check for non-UTF-8 7bit encodings such as

   255    * ISO-2022-JP and HZ.

   256    *

   257    * It rejects sequences with the following errors:

   258    *

   259    * byte sequences that cannot be decoded into characters according to

   260    *   UTF-8's rules (including cases where the input is part of a valid

   261    *   UTF-8 sequence but starts or ends mid-character)

   262    * overlong sequences (i.e., cases where a character was encoded

   263    *   non-canonically by using more bytes than necessary)

   264    * surrogate codepoints (i.e., the codepoints reserved for

   265        representing astral characters in UTF-16)

   266    * codepoints above the unicode range (i.e., outside the first 17

   267    *   planes; higher than U+10FFFF), in accordance with

   268    *   http://tools.ietf.org/html/rfc3629

   269    * when aRejectNonChar is true (the default), any codepoint whose low

   270    *   16 bits are 0xFFFE or 0xFFFF

   272    *

   273    * @param aString an 8-bit wide string to scan

   274    * @param aRejectNonChar a boolean to control the rejection of utf-8

   275    *        non characters

   276    */

   277 bool IsUTF8( const nsACString& aString, bool aRejectNonChar = true );

   279 bool ParseString(const nsACString& aAstring, char aDelimiter,

   280                           nsTArray<nsCString>& aArray);

   282   /**

   283    * Converts case in place in the argument string.

   284    */

   285 void ToUpperCase( nsACString& );

   287 void ToLowerCase( nsACString& );

   289 void ToUpperCase( nsCSubstring& );

   291 void ToLowerCase( nsCSubstring& );

   293   /**

   294    * Converts case from string aSource to aDest.

   295    */

   296 void ToUpperCase( const nsACString& aSource, nsACString& aDest );

   298 void ToLowerCase( const nsACString& aSource, nsACString& aDest );

   300   /**

   301    * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.

   302    *

   303    * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to

   304    * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.

   305    *

   306    * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.

   307    * If we need something faster, then we can implement that later.

   308    */

   310 bool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );

   311 bool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );

   313 /* sometimes we don't care about where the string was, just that we

   314  * found it or not */

   315 inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() )

   316 {

   317   nsAString::const_iterator start, end;

   318   aSource.BeginReading(start);

   319   aSource.EndReading(end);

   320   return FindInReadable(aPattern, start, end, compare);

   321 }

   323 inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() )

   324 {

   325   nsACString::const_iterator start, end;

   326   aSource.BeginReading(start);

   327   aSource.EndReading(end);

   328   return FindInReadable(aPattern, start, end, compare);

   329 }

   332 bool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& );

   334   /**

   335    * Finds the rightmost occurrence of |aPattern|

   336    * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to

   337    * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.

   338    *

   339    */

   340 bool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );

   341 bool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );

   343    /**

   344    * Finds the leftmost occurrence of |aChar|, if any in the range

   345    * |aSearchStart|..|aSearchEnd|.

   346    *

   347    * Returns |true| if a match was found, and adjusts |aSearchStart| to

   348    * point to the match.  If no match was found, returns |false| and

   349    * makes |aSearchStart == aSearchEnd|.

   350    */

   351 bool FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd );

   352 bool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd );

   354     /**

   355     * Finds the number of occurences of |aChar| in the string |aStr|

   356     */

   357 uint32_t CountCharInReadable( const nsAString& aStr,

   358                                      char16_t aChar );

   359 uint32_t CountCharInReadable( const nsACString& aStr,

   360                                      char aChar );

   362 bool

   363 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,

   364                   const nsStringComparator& aComparator =

   365                                               nsDefaultStringComparator() );

   366 bool

   367 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,

   368                   const nsCStringComparator& aComparator =

   369                                                nsDefaultCStringComparator() );

   370 bool

   371 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,

   372                 const nsStringComparator& aComparator =

   373                                             nsDefaultStringComparator() );

   374 bool

   375 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,

   376                 const nsCStringComparator& aComparator =

   377                                              nsDefaultCStringComparator() );

   379 const nsAFlatString& EmptyString();

   380 const nsAFlatCString& EmptyCString();

   382 const nsAFlatString& NullString();

   383 const nsAFlatCString& NullCString();

   385    /**

   386    * Compare a UTF-8 string to an UTF-16 string.

   387    *

   388    * Returns 0 if the strings are equal, -1 if aUTF8String is less

   389    * than aUTF16Count, and 1 in the reverse case.  In case of fatal

   390    * error (eg the strings are not valid UTF8 and UTF16 respectively),

   391    * this method will return INT32_MIN.

   392    */

   393 int32_t

   394 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,

   395                    const nsASingleFragmentString& aUTF16String);

   397 void

   398 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);

   400 template<class T>

   401 inline bool EnsureStringLength(T& aStr, uint32_t aLen)

   402 {

   403     aStr.SetLength(aLen);

   404     return (aStr.Length() == aLen);

   405 }

   407 #endif // !defined(nsReadableUtils_h___)

The Tor Browser / file revision

xpcom/string/public/nsReadableUtils.h@b8a032363ba2

xpcom/string/public/nsReadableUtils.h