xpcom/string/public/nsReadableUtils.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 // IWYU pragma: private, include "nsString.h"
     7 #ifndef nsReadableUtils_h___
     8 #define nsReadableUtils_h___
    10   /**
    11    * I guess all the routines in this file are all mis-named.
    12    * According to our conventions, they should be |NS_xxx|.
    13    */
    15 #include "nsAString.h"
    17 #include "nsTArrayForwardDeclare.h"
    19 inline size_t Distance( const nsReadingIterator<char16_t>& start, const nsReadingIterator<char16_t>& end )
    20   {
    21     return end.get() - start.get();
    22   }
    23 inline size_t Distance( const nsReadingIterator<char>& start, const nsReadingIterator<char>& end )
    24   {
    25     return end.get() - start.get();
    26   }
    28 void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest );
    29 void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
    31 void LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest );
    32 void CopyASCIItoUTF16( const char* aSource, nsAString& aDest );
    34 void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
    35 void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
    37 void CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest );
    38 void CopyUTF8toUTF16( const char* aSource, nsAString& aDest );
    40 void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest );
    41 void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
    42 bool AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest,
    43                          const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
    45 void LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest );
    46 void AppendASCIItoUTF16( const char* aSource, nsAString& aDest );
    48 void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
    49 bool AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest,
    50                         const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
    51 void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
    52 bool AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest,
    53                         const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
    55 void AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest );
    56 void AppendUTF8toUTF16( const char* aSource, nsAString& aDest );
    58 #ifdef MOZ_USE_CHAR16_WRAPPER
    59 inline void AppendUTF16toUTF8( char16ptr_t aSource, nsACString& aDest )
    60   {
    61     return AppendUTF16toUTF8(static_cast<const char16_t*>(aSource), aDest);
    62   }
    63 #endif
    65   /**
    66    * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
    67    *
    68    * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
    69    * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
    70    * This conversion is not well defined; but it reproduces legacy string behavior.
    71    * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
    72    *
    73    * @param aSource a 16-bit wide string
    74    * @return a new |char| buffer you must free with |nsMemory::Free|.
    75    */
    76 char* ToNewCString( const nsAString& aSource );
    79   /**
    80    * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
    81    *
    82    * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
    83    * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
    84    *
    85    * @param aSource an 8-bit wide string
    86    * @return a new |char| buffer you must free with |nsMemory::Free|.
    87    */
    88 char* ToNewCString( const nsACString& aSource );
    90   /**
    91    * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
    92    *
    93    * Allocates and returns a new |char| buffer which you must free with 
    94    * |nsMemory::Free|.
    95    * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
    96    * copying |aSource| to your new buffer.
    97    * The new buffer is zero-terminated, but that may not help you if |aSource| 
    98    * contains embedded nulls.
    99    *
   100    * @param aSource a UTF-16 string (made of char16_t's)
   101    * @param aUTF8Count the number of 8-bit units that was returned
   102    * @return a new |char| buffer you must free with |nsMemory::Free|.
   103    */
   105 char* ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count = nullptr );
   108   /**
   109    * Returns a new |char16_t| buffer containing a zero-terminated copy of 
   110    * |aSource|.
   111    *
   112    * Allocates and returns a new |char16_t| buffer which you must free with 
   113    * |nsMemory::Free|.
   114    * The new buffer is zero-terminated, but that may not help you if |aSource| 
   115    * contains embedded nulls.
   116    *
   117    * @param aSource a UTF-16 string
   118    * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
   119    */
   120 char16_t* ToNewUnicode( const nsAString& aSource );
   123   /**
   124    * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
   125    *
   126    * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|.
   127    * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
   128    * This conversion is not well defined; but it reproduces legacy string behavior.
   129    * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
   130    *
   131    * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
   132    * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
   133    */
   134 char16_t* ToNewUnicode( const nsACString& aSource );
   136   /**
   137    * Returns the required length for a char16_t buffer holding
   138    * a copy of aSource, using UTF-8 to UTF-16 conversion.
   139    * The length does NOT include any space for zero-termination.
   140    *
   141    * @param aSource an 8-bit wide string, UTF-8 encoded
   142    * @return length of UTF-16 encoded string copy, not zero-terminated
   143    */
   144 uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource );
   146   /**
   147    * Copies the source string into the specified buffer, converting UTF-8 to
   148    * UTF-16 in the process. The conversion is well defined for valid UTF-8
   149    * strings.
   150    * The copied string will be zero-terminated! Any embedded nulls will be
   151    * copied nonetheless. It is the caller's responsiblity to ensure the buffer
   152    * is large enough to hold the string copy plus one char16_t for
   153    * zero-termination!
   154    *
   155    * @see CalcUTF8ToUnicodeLength( const nsACString& )
   156    * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
   157    *
   158    * @param aSource an 8-bit wide string, UTF-8 encoded
   159    * @param aBuffer the buffer holding the converted string copy
   160    * @param aUTF16Count receiving optionally the number of 16-bit units that
   161    *                    were copied
   162    * @return aBuffer pointer, for convenience 
   163    */
   164 char16_t* UTF8ToUnicodeBuffer( const nsACString& aSource,
   165                                 char16_t *aBuffer,
   166                                 uint32_t *aUTF16Count = nullptr );
   168   /**
   169    * Returns a new |char16_t| buffer containing a zero-terminated copy
   170    * of |aSource|.
   171    *
   172    * Allocates and returns a new |char| buffer which you must free with
   173    * |nsMemory::Free|.  Performs an encoding conversion from UTF-8 to UTF-16 
   174    * while copying |aSource| to your new buffer.  This conversion is well defined
   175    * for a valid UTF-8 string.  The new buffer is zero-terminated, but that 
   176    * may not help you if |aSource| contains embedded nulls.
   177    *
   178    * @param aSource an 8-bit wide string, UTF-8 encoded
   179    * @param aUTF16Count the number of 16-bit units that was returned
   180    * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
   181    *         (UTF-16 encoded)
   182    */
   183 char16_t* UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count = nullptr );
   185   /**
   186    * Copies |aLength| 16-bit code units from the start of |aSource| to the
   187    * |char16_t| buffer |aDest|.
   188    *
   189    * After this operation |aDest| is not null terminated.
   190    *
   191    * @param aSource a UTF-16 string
   192    * @param aSrcOffset start offset in the source string
   193    * @param aDest a |char16_t| buffer
   194    * @param aLength the number of 16-bit code units to copy
   195    * @return pointer to destination buffer - identical to |aDest|
   196    */
   197 char16_t* CopyUnicodeTo( const nsAString& aSource,
   198                                  uint32_t aSrcOffset,
   199                                  char16_t* aDest,
   200                                  uint32_t aLength );
   203   /**
   204    * Copies 16-bit characters between iterators |aSrcStart| and
   205    * |aSrcEnd| to the writable string |aDest|. Similar to the
   206    * |nsString::Mid| method.
   207    *
   208    * After this operation |aDest| is not null terminated.
   209    *
   210    * @param aSrcStart start source iterator
   211    * @param aSrcEnd end source iterator
   212    * @param aDest destination for the copy
   213    */
   214 void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
   215                            const nsAString::const_iterator& aSrcEnd,
   216                            nsAString& aDest );
   218   /**
   219    * Appends 16-bit characters between iterators |aSrcStart| and
   220    * |aSrcEnd| to the writable string |aDest|. 
   221    *
   222    * After this operation |aDest| is not null terminated.
   223    *
   224    * @param aSrcStart start source iterator
   225    * @param aSrcEnd end source iterator
   226    * @param aDest destination for the copy
   227    */
   228 void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
   229                              const nsAString::const_iterator& aSrcEnd,
   230                              nsAString& aDest );
   232   /**
   233    * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
   234    *
   235    * @param aString a 16-bit wide string to scan
   236    */
   237 bool IsASCII( const nsAString& aString );
   239   /**
   240    * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
   241    *
   242    * @param aString a 8-bit wide string to scan
   243    */
   244 bool IsASCII( const nsACString& aString );
   246   /**
   247    * Returns |true| if |aString| is a valid UTF-8 string.
   248    * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator. 
   249    * It is mainly written to replace and roughly equivalent to
   250    *
   251    *    str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
   252    *
   253    * (see bug 191541)
   254    * As such,  it does not check for non-UTF-8 7bit encodings such as 
   255    * ISO-2022-JP and HZ. 
   256    *
   257    * It rejects sequences with the following errors:
   258    *
   259    * byte sequences that cannot be decoded into characters according to
   260    *   UTF-8's rules (including cases where the input is part of a valid
   261    *   UTF-8 sequence but starts or ends mid-character)
   262    * overlong sequences (i.e., cases where a character was encoded
   263    *   non-canonically by using more bytes than necessary)
   264    * surrogate codepoints (i.e., the codepoints reserved for
   265        representing astral characters in UTF-16)
   266    * codepoints above the unicode range (i.e., outside the first 17
   267    *   planes; higher than U+10FFFF), in accordance with
   268    *   http://tools.ietf.org/html/rfc3629
   269    * when aRejectNonChar is true (the default), any codepoint whose low
   270    *   16 bits are 0xFFFE or 0xFFFF
   272    *
   273    * @param aString an 8-bit wide string to scan
   274    * @param aRejectNonChar a boolean to control the rejection of utf-8
   275    *        non characters
   276    */
   277 bool IsUTF8( const nsACString& aString, bool aRejectNonChar = true );
   279 bool ParseString(const nsACString& aAstring, char aDelimiter, 
   280                           nsTArray<nsCString>& aArray);
   282   /**
   283    * Converts case in place in the argument string.
   284    */
   285 void ToUpperCase( nsACString& );
   287 void ToLowerCase( nsACString& );
   289 void ToUpperCase( nsCSubstring& );
   291 void ToLowerCase( nsCSubstring& );
   293   /**
   294    * Converts case from string aSource to aDest.
   295    */
   296 void ToUpperCase( const nsACString& aSource, nsACString& aDest );
   298 void ToLowerCase( const nsACString& aSource, nsACString& aDest );
   300   /**
   301    * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
   302    *
   303    * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
   304    * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
   305    *
   306    * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
   307    * If we need something faster, then we can implement that later.
   308    */
   310 bool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
   311 bool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
   313 /* sometimes we don't care about where the string was, just that we
   314  * found it or not */
   315 inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() )
   316 {
   317   nsAString::const_iterator start, end;
   318   aSource.BeginReading(start);
   319   aSource.EndReading(end);
   320   return FindInReadable(aPattern, start, end, compare);
   321 }
   323 inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() )
   324 {
   325   nsACString::const_iterator start, end;
   326   aSource.BeginReading(start);
   327   aSource.EndReading(end);
   328   return FindInReadable(aPattern, start, end, compare);
   329 }
   332 bool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& );
   334   /**
   335    * Finds the rightmost occurrence of |aPattern| 
   336    * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
   337    * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
   338    *
   339    */
   340 bool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
   341 bool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
   343    /**
   344    * Finds the leftmost occurrence of |aChar|, if any in the range 
   345    * |aSearchStart|..|aSearchEnd|.
   346    *
   347    * Returns |true| if a match was found, and adjusts |aSearchStart| to
   348    * point to the match.  If no match was found, returns |false| and 
   349    * makes |aSearchStart == aSearchEnd|.
   350    */
   351 bool FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd );
   352 bool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd );
   354     /**
   355     * Finds the number of occurences of |aChar| in the string |aStr|
   356     */
   357 uint32_t CountCharInReadable( const nsAString& aStr,
   358                                      char16_t aChar );
   359 uint32_t CountCharInReadable( const nsACString& aStr,
   360                                      char aChar );
   362 bool
   363 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
   364                   const nsStringComparator& aComparator =
   365                                               nsDefaultStringComparator() );
   366 bool
   367 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
   368                   const nsCStringComparator& aComparator =
   369                                                nsDefaultCStringComparator() );
   370 bool
   371 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
   372                 const nsStringComparator& aComparator =
   373                                             nsDefaultStringComparator() );
   374 bool
   375 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
   376                 const nsCStringComparator& aComparator =
   377                                              nsDefaultCStringComparator() );
   379 const nsAFlatString& EmptyString();
   380 const nsAFlatCString& EmptyCString();
   382 const nsAFlatString& NullString();
   383 const nsAFlatCString& NullCString();
   385    /**
   386    * Compare a UTF-8 string to an UTF-16 string.
   387    *
   388    * Returns 0 if the strings are equal, -1 if aUTF8String is less
   389    * than aUTF16Count, and 1 in the reverse case.  In case of fatal
   390    * error (eg the strings are not valid UTF8 and UTF16 respectively),
   391    * this method will return INT32_MIN.
   392    */
   393 int32_t
   394 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
   395                    const nsASingleFragmentString& aUTF16String);
   397 void
   398 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
   400 template<class T>
   401 inline bool EnsureStringLength(T& aStr, uint32_t aLen)
   402 {
   403     aStr.SetLength(aLen);
   404     return (aStr.Length() == aLen);
   405 }
   407 #endif // !defined(nsReadableUtils_h___)

mercurial