The Tor Browser: xpcom/string/src/nsReadableUtils.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

     2 /* This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this

     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     6 #include "nsReadableUtils.h"

     8 #include "nsMemory.h"

     9 #include "nsString.h"

    10 #include "nsTArray.h"

    11 #include "nsUTF8Utils.h"

    13 void

    14 LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest )

    15   {

    16     aDest.Truncate();

    17     LossyAppendUTF16toASCII(aSource, aDest);

    18   }

    20 void

    21 CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest )

    22   {

    23     aDest.Truncate();

    24     AppendASCIItoUTF16(aSource, aDest);

    25   }

    27 void

    28 LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest )

    29   {

    30     aDest.Truncate();

    31     if (aSource) {

    32       LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);

    33     }

    34   }

    36 void

    37 CopyASCIItoUTF16( const char* aSource, nsAString& aDest )

    38   {

    39     aDest.Truncate();

    40     if (aSource) {

    41       AppendASCIItoUTF16(nsDependentCString(aSource), aDest);

    42     }

    43   }

    45 void

    46 CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest )

    47   {

    48     aDest.Truncate();

    49     AppendUTF16toUTF8(aSource, aDest);

    50   }

    52 void

    53 CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest )

    54   {

    55     aDest.Truncate();

    56     AppendUTF8toUTF16(aSource, aDest);

    57   }

    59 void

    60 CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest )

    61   {

    62     aDest.Truncate();

    63     AppendUTF16toUTF8(aSource, aDest);

    64   }

    66 void

    67 CopyUTF8toUTF16( const char* aSource, nsAString& aDest )

    68   {

    69     aDest.Truncate();

    70     AppendUTF8toUTF16(aSource, aDest);

    71   }

    73 void

    74 LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest )

    75   {

    76     uint32_t old_dest_length = aDest.Length();

    77     aDest.SetLength(old_dest_length + aSource.Length());

    79     nsAString::const_iterator fromBegin, fromEnd;

    81     nsACString::iterator dest;

    82     aDest.BeginWriting(dest);

    84     dest.advance(old_dest_length);

    86     // right now, this won't work on multi-fragment destinations

    87     LossyConvertEncoding16to8 converter(dest.get());

    89     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);

    90   }

    92 void

    93 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest )

    94   {

    95     if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible_t())) {

    96       NS_ABORT_OOM(aDest.Length() + aSource.Length());

    97     }

    98   }

   100 bool

   101 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest,

   102                     const mozilla::fallible_t& )

   103   {

   104     uint32_t old_dest_length = aDest.Length();

   105     if (!aDest.SetLength(old_dest_length + aSource.Length(), mozilla::fallible_t())) {

   106       return false;

   107     }

   109     nsACString::const_iterator fromBegin, fromEnd;

   111     nsAString::iterator dest;

   112     aDest.BeginWriting(dest);

   114     dest.advance(old_dest_length);

   116       // right now, this won't work on multi-fragment destinations

   117     LossyConvertEncoding8to16 converter(dest.get());

   119     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);

   120     return true;

   121   }

   123 void

   124 LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest )

   125   {

   126     if (aSource) {

   127       LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);

   128     }

   129   }

   131 void

   132 AppendASCIItoUTF16( const char* aSource, nsAString& aDest )

   133   {

   134     if (aSource) {

   135       AppendASCIItoUTF16(nsDependentCString(aSource), aDest);

   136     }

   137   }

   139 void

   140 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )

   141 {

   142   if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible_t())) {

   143     NS_ABORT_OOM(aDest.Length() + aSource.Length());

   144   }

   145 }

   147 bool

   148 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest,

   149                    const mozilla::fallible_t& )

   150   {

   151     nsAString::const_iterator source_start, source_end;

   152     CalculateUTF8Size calculator;

   153     copy_string(aSource.BeginReading(source_start),

   154                 aSource.EndReading(source_end), calculator);

   156     uint32_t count = calculator.Size();

   158     if (count)

   159       {

   160         uint32_t old_dest_length = aDest.Length();

   162         // Grow the buffer if we need to.

   163         if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) {

   164           return false;

   165         }

   167         // All ready? Time to convert

   169         ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length);

   170         copy_string(aSource.BeginReading(source_start),

   171                     aSource.EndReading(source_end), converter);

   173         NS_ASSERTION(converter.Size() == count,

   174                      "Unexpected disparity between CalculateUTF8Size and "

   175                      "ConvertUTF16toUTF8");

   176       }

   178     return true;

   179   }

   181 void

   182 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )

   183 {

   184   if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible_t())) {

   185     NS_ABORT_OOM(aDest.Length() + aSource.Length());

   186   }

   187 }

   189 bool

   190 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest,

   191                    const mozilla::fallible_t& )

   192   {

   193     nsACString::const_iterator source_start, source_end;

   194     CalculateUTF8Length calculator;

   195     copy_string(aSource.BeginReading(source_start),

   196                 aSource.EndReading(source_end), calculator);

   198     uint32_t count = calculator.Length();

   200     // Avoid making the string mutable if we're appending an empty string

   201     if (count)

   202       {

   203         uint32_t old_dest_length = aDest.Length();

   205         // Grow the buffer if we need to.

   206         if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) {

   207           return false;

   208         }

   210         // All ready? Time to convert

   212         ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);

   213         copy_string(aSource.BeginReading(source_start),

   214                     aSource.EndReading(source_end), converter);

   216         NS_ASSERTION(converter.ErrorEncountered() ||

   217                      converter.Length() == count,

   218                      "CalculateUTF8Length produced the wrong length");

   220         if (converter.ErrorEncountered())

   221           {

   222             NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");

   223             aDest.SetLength(old_dest_length);

   224           }

   225       }

   227     return true;

   228   }

   230 void

   231 AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest )

   232   {

   233     if (aSource) {

   234       AppendUTF16toUTF8(nsDependentString(aSource), aDest);

   235     }

   236   }

   238 void

   239 AppendUTF8toUTF16( const char* aSource, nsAString& aDest )

   240   {

   241     if (aSource) {

   242       AppendUTF8toUTF16(nsDependentCString(aSource), aDest);

   243     }

   244   }

   247   /**

   248    * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).

   249    *

   250    * @param aSource an string you will eventually be making a copy of

   251    * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.

   252    *

   253    */

   254 template <class FromStringT, class ToCharT>

   255 inline

   256 ToCharT*

   257 AllocateStringCopy( const FromStringT& aSource, ToCharT* )

   258   {

   259     return static_cast<ToCharT*>(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT)));

   260   }

   263 char*

   264 ToNewCString( const nsAString& aSource )

   265   {

   266     char* result = AllocateStringCopy(aSource, (char*)0);

   267     if (!result)

   268       return nullptr;

   270     nsAString::const_iterator fromBegin, fromEnd;

   271     LossyConvertEncoding16to8 converter(result);

   272     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();

   273     return result;

   274   }

   276 char*

   277 ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count )

   278   {

   279     nsAString::const_iterator start, end;

   280     CalculateUTF8Size calculator;

   281     copy_string(aSource.BeginReading(start), aSource.EndReading(end),

   282                 calculator);

   284     if (aUTF8Count)

   285       *aUTF8Count = calculator.Size();

   287     char *result = static_cast<char*>

   288                               (nsMemory::Alloc(calculator.Size() + 1));

   289     if (!result)

   290       return nullptr;

   292     ConvertUTF16toUTF8 converter(result);

   293     copy_string(aSource.BeginReading(start), aSource.EndReading(end),

   294                 converter).write_terminator();

   295     NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");

   297     return result;

   298   }

   300 char*

   301 ToNewCString( const nsACString& aSource )

   302   {

   303     // no conversion needed, just allocate a buffer of the correct length and copy into it

   305     char* result = AllocateStringCopy(aSource, (char*)0);

   306     if (!result)

   307       return nullptr;

   309     nsACString::const_iterator fromBegin, fromEnd;

   310     char* toBegin = result;

   311     *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0);

   312     return result;

   313   }

   315 char16_t*

   316 ToNewUnicode( const nsAString& aSource )

   317   {

   318     // no conversion needed, just allocate a buffer of the correct length and copy into it

   320     char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);

   321     if (!result)

   322       return nullptr;

   324     nsAString::const_iterator fromBegin, fromEnd;

   325     char16_t* toBegin = result;

   326     *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char16_t(0);

   327     return result;

   328   }

   330 char16_t*

   331 ToNewUnicode( const nsACString& aSource )

   332   {

   333     char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);

   334     if (!result)

   335       return nullptr;

   337     nsACString::const_iterator fromBegin, fromEnd;

   338     LossyConvertEncoding8to16 converter(result);

   339     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();

   340     return result;

   341   }

   343 uint32_t

   344 CalcUTF8ToUnicodeLength( const nsACString& aSource)

   345   {

   346     nsACString::const_iterator start, end;

   347     CalculateUTF8Length calculator;

   348     copy_string(aSource.BeginReading(start), aSource.EndReading(end),

   349                 calculator);

   350     return calculator.Length();

   351   }

   353 char16_t*

   354 UTF8ToUnicodeBuffer( const nsACString& aSource, char16_t* aBuffer, uint32_t *aUTF16Count )

   355   {

   356     nsACString::const_iterator start, end;

   357     ConvertUTF8toUTF16 converter(aBuffer);

   358     copy_string(aSource.BeginReading(start),

   359                 aSource.EndReading(end),

   360                 converter).write_terminator();

   361     if (aUTF16Count)

   362       *aUTF16Count = converter.Length();

   363     return aBuffer;

   364   }

   366 char16_t*

   367 UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count )

   368   {

   369     const uint32_t length = CalcUTF8ToUnicodeLength(aSource);

   370     const size_t buffer_size = (length + 1) * sizeof(char16_t);

   371     char16_t *buffer = static_cast<char16_t*>(nsMemory::Alloc(buffer_size));

   372     if (!buffer)

   373       return nullptr;

   375     uint32_t copied;

   376     UTF8ToUnicodeBuffer(aSource, buffer, &copied);

   377     NS_ASSERTION(length == copied, "length mismatch");

   379     if (aUTF16Count)

   380       *aUTF16Count = copied;

   381     return buffer;

   382   }

   384 char16_t*

   385 CopyUnicodeTo( const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, uint32_t aLength )

   386   {

   387     nsAString::const_iterator fromBegin, fromEnd;

   388     char16_t* toBegin = aDest;

   389     copy_string(aSource.BeginReading(fromBegin).advance( int32_t(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( int32_t(aSrcOffset+aLength) ), toBegin);

   390     return aDest;

   391   }

   393 void

   394 CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,

   395                const nsAString::const_iterator& aSrcEnd,

   396                nsAString& aDest )

   397   {

   398     nsAString::iterator writer;

   399     aDest.SetLength(Distance(aSrcStart, aSrcEnd));

   401     aDest.BeginWriting(writer);

   402     nsAString::const_iterator fromBegin(aSrcStart);

   404     copy_string(fromBegin, aSrcEnd, writer);

   405   }

   407 void

   408 AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,

   409                  const nsAString::const_iterator& aSrcEnd,

   410                  nsAString& aDest )

   411   {

   412     nsAString::iterator writer;

   413     uint32_t oldLength = aDest.Length();

   414     aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));

   416     aDest.BeginWriting(writer).advance(oldLength);

   417     nsAString::const_iterator fromBegin(aSrcStart);

   419     copy_string(fromBegin, aSrcEnd, writer);

   420   }

   422 bool

   423 IsASCII( const nsAString& aString )

   424   {

   425     static const char16_t NOT_ASCII = char16_t(~0x007F);

   428     // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character

   430     nsAString::const_iterator iter, done_reading;

   431     aString.BeginReading(iter);

   432     aString.EndReading(done_reading);

   434     const char16_t* c = iter.get();

   435     const char16_t* end = done_reading.get();

   437     while ( c < end )

   438       {

   439         if ( *c++ & NOT_ASCII )

   440           return false;

   441       }

   443     return true;

   444   }

   446 bool

   447 IsASCII( const nsACString& aString )

   448   {

   449     static const char NOT_ASCII = char(~0x7F);

   452     // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character

   454     nsACString::const_iterator iter, done_reading;

   455     aString.BeginReading(iter);

   456     aString.EndReading(done_reading);

   458     const char* c = iter.get();

   459     const char* end = done_reading.get();

   461     while ( c < end )

   462       {

   463         if ( *c++ & NOT_ASCII )

   464           return false;

   465       }

   467     return true;

   468   }

   470 bool

   471 IsUTF8( const nsACString& aString, bool aRejectNonChar )

   472   {

   473     nsReadingIterator<char> done_reading;

   474     aString.EndReading(done_reading);

   476     int32_t state = 0;

   477     bool overlong = false;

   478     bool surrogate = false;

   479     bool nonchar = false;

   480     uint16_t olupper = 0; // overlong byte upper bound.

   481     uint16_t slower = 0;  // surrogate byte lower bound.

   483     nsReadingIterator<char> iter;

   484     aString.BeginReading(iter);

   486     const char* ptr = iter.get();

   487     const char* end = done_reading.get();

   488     while ( ptr < end )

   489       {

   490         uint8_t c;

   492         if (0 == state)

   493           {

   494             c = *ptr++;

   496             if ( UTF8traits::isASCII(c) )

   497               continue;

   499             if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.

   500               return false;

   501             else if ( UTF8traits::is2byte(c) )

   502                 state = 1;

   503             else if ( UTF8traits::is3byte(c) )

   504               {

   505                 state = 2;

   506                 if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF]

   507                   {

   508                     overlong = true;

   509                     olupper = 0x9F;

   510                   }

   511                 else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint

   512                   {

   513                     surrogate = true;

   514                     slower = 0xA0;

   515                   }

   516                 else if ( c == 0xEF ) // EF BF [BE-BF] : non-character

   517                   nonchar = true;

   518               }

   519             else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)

   520               {

   521                 state = 3;

   522                 nonchar = true;

   523                 if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}

   524                   {

   525                     overlong = true;

   526                     olupper = 0x8F;

   527                   }

   528                 else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF]

   529                   {

   530                     // actually not surrogates but codepoints beyond 0x10FFFF

   531                     surrogate = true;

   532                     slower = 0x90;

   533                   }

   534               }

   535             else

   536               return false; // Not UTF-8 string

   537           }

   539         if (nonchar && !aRejectNonChar)

   540           nonchar = false;

   542         while ( ptr < end && state )

   543           {

   544             c = *ptr++;

   545             --state;

   547             // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]

   548             if ( nonchar &&

   549                  ( ( !state && c < 0xBE ) ||

   550                    ( state == 1 && c != 0xBF )  ||

   551                    ( state == 2 && 0x0F != (0x0F & c) )))

   552               nonchar = false;

   554             if ( !UTF8traits::isInSeq(c) || ( overlong && c <= olupper ) ||

   555                  ( surrogate && slower <= c ) || ( nonchar && !state ))

   556               return false; // Not UTF-8 string

   558             overlong = surrogate = false;

   559           }

   560         }

   561     return !state; // state != 0 at the end indicates an invalid UTF-8 seq.

   562   }

   564   /**

   565    * A character sink for in-place case conversion.

   566    */

   567 class ConvertToUpperCase

   568   {

   569     public:

   570       typedef char value_type;

   572       uint32_t

   573       write( const char* aSource, uint32_t aSourceLength )

   574         {

   575           char* cp = const_cast<char*>(aSource);

   576           const char* end = aSource + aSourceLength;

   577           while (cp != end) {

   578             char ch = *cp;

   579             if ((ch >= 'a') && (ch <= 'z'))

   580               *cp = ch - ('a' - 'A');

   581             ++cp;

   582           }

   583           return aSourceLength;

   584         }

   585   };

   587 void

   588 ToUpperCase( nsCSubstring& aCString )

   589   {

   590     ConvertToUpperCase converter;

   591     char* start;

   592     converter.write(aCString.BeginWriting(start), aCString.Length());

   593   }

   595   /**

   596    * A character sink for copying with case conversion.

   597    */

   598 class CopyToUpperCase

   599   {

   600     public:

   601       typedef char value_type;

   603       CopyToUpperCase( nsACString::iterator& aDestIter )

   604         : mIter(aDestIter)

   605         {

   606         }

   608       uint32_t

   609       write( const char* aSource, uint32_t aSourceLength )

   610         {

   611           uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength);

   612           char* cp = mIter.get();

   613           const char* end = aSource + len;

   614           while (aSource != end) {

   615             char ch = *aSource;

   616             if ((ch >= 'a') && (ch <= 'z'))

   617               *cp = ch - ('a' - 'A');

   618             else

   619               *cp = ch;

   620             ++aSource;

   621             ++cp;

   622           }

   623           mIter.advance(len);

   624           return len;

   625         }

   627     protected:

   628       nsACString::iterator& mIter;

   629   };

   631 void

   632 ToUpperCase( const nsACString& aSource, nsACString& aDest )

   633   {

   634     nsACString::const_iterator fromBegin, fromEnd;

   635     nsACString::iterator toBegin;

   636     aDest.SetLength(aSource.Length());

   638     CopyToUpperCase converter(aDest.BeginWriting(toBegin));

   639     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);

   640   }

   642   /**

   643    * A character sink for case conversion.

   644    */

   645 class ConvertToLowerCase

   646   {

   647     public:

   648       typedef char value_type;

   650       uint32_t

   651       write( const char* aSource, uint32_t aSourceLength )

   652         {

   653           char* cp = const_cast<char*>(aSource);

   654           const char* end = aSource + aSourceLength;

   655           while (cp != end) {

   656             char ch = *cp;

   657             if ((ch >= 'A') && (ch <= 'Z'))

   658               *cp = ch + ('a' - 'A');

   659             ++cp;

   660           }

   661           return aSourceLength;

   662         }

   663   };

   665 void

   666 ToLowerCase( nsCSubstring& aCString )

   667   {

   668     ConvertToLowerCase converter;

   669     char* start;

   670     converter.write(aCString.BeginWriting(start), aCString.Length());

   671   }

   673   /**

   674    * A character sink for copying with case conversion.

   675    */

   676 class CopyToLowerCase

   677   {

   678     public:

   679       typedef char value_type;

   681       CopyToLowerCase( nsACString::iterator& aDestIter )

   682         : mIter(aDestIter)

   683         {

   684         }

   686       uint32_t

   687       write( const char* aSource, uint32_t aSourceLength )

   688         {

   689           uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength);

   690           char* cp = mIter.get();

   691           const char* end = aSource + len;

   692           while (aSource != end) {

   693             char ch = *aSource;

   694             if ((ch >= 'A') && (ch <= 'Z'))

   695               *cp = ch + ('a' - 'A');

   696             else

   697               *cp = ch;

   698             ++aSource;

   699             ++cp;

   700           }

   701           mIter.advance(len);

   702           return len;

   703         }

   705     protected:

   706       nsACString::iterator& mIter;

   707   };

   709 void

   710 ToLowerCase( const nsACString& aSource, nsACString& aDest )

   711   {

   712     nsACString::const_iterator fromBegin, fromEnd;

   713     nsACString::iterator toBegin;

   714     aDest.SetLength(aSource.Length());

   716     CopyToLowerCase converter(aDest.BeginWriting(toBegin));

   717     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);

   718   }

   720 bool

   721 ParseString(const nsACString& aSource, char aDelimiter,

   722             nsTArray<nsCString>& aArray)

   723   {

   724     nsACString::const_iterator start, end;

   725     aSource.BeginReading(start);

   726     aSource.EndReading(end);

   728     uint32_t oldLength = aArray.Length();

   730     for (;;)

   731       {

   732         nsACString::const_iterator delimiter = start;

   733         FindCharInReadable(aDelimiter, delimiter, end);

   735         if (delimiter != start)

   736           {

   737             if (!aArray.AppendElement(Substring(start, delimiter)))

   738               {

   739                 aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength);

   740                 return false;

   741               }

   742           }

   744         if (delimiter == end)

   745           break;

   746         start = ++delimiter;

   747         if (start == end)

   748           break;

   749       }

   751     return true;

   752   }

   754 template <class StringT, class IteratorT, class Comparator>

   755 bool

   756 FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )

   757   {

   758     bool found_it = false;

   760       // only bother searching at all if we're given a non-empty range to search

   761     if ( aSearchStart != aSearchEnd )

   762       {

   763         IteratorT aPatternStart, aPatternEnd;

   764         aPattern.BeginReading(aPatternStart);

   765         aPattern.EndReading(aPatternEnd);

   767           // outer loop keeps searching till we find it or run out of string to search

   768         while ( !found_it )

   769           {

   770               // fast inner loop (that's what it's called, not what it is) looks for a potential match

   771             while ( aSearchStart != aSearchEnd &&

   772                     compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )

   773               ++aSearchStart;

   775               // if we broke out of the `fast' loop because we're out of string ... we're done: no match

   776             if ( aSearchStart == aSearchEnd )

   777               break;

   779               // otherwise, we're at a potential match, let's see if we really hit one

   780             IteratorT testPattern(aPatternStart);

   781             IteratorT testSearch(aSearchStart);

   783               // slow inner loop verifies the potential match (found by the `fast' loop) at the current position

   784             for(;;)

   785               {

   786                   // we already compared the first character in the outer loop,

   787                   //  so we'll advance before the next comparison

   788                 ++testPattern;

   789                 ++testSearch;

   791                   // if we verified all the way to the end of the pattern, then we found it!

   792                 if ( testPattern == aPatternEnd )

   793                   {

   794                     found_it = true;

   795                     aSearchEnd = testSearch; // return the exact found range through the parameters

   796                     break;

   797                   }

   799                   // if we got to end of the string we're searching before we hit the end of the

   800                   //  pattern, we'll never find what we're looking for

   801                 if ( testSearch == aSearchEnd )

   802                   {

   803                     aSearchStart = aSearchEnd;

   804                     break;

   805                   }

   807                   // else if we mismatched ... it's time to advance to the next search position

   808                   //  and get back into the `fast' loop

   809                 if ( compare(testPattern.get(), testSearch.get(), 1, 1) )

   810                   {

   811                     ++aSearchStart;

   812                     break;

   813                   }

   814               }

   815           }

   816       }

   818     return found_it;

   819   }

   821   /**

   822    * This searches the entire string from right to left, and returns the first match found, if any.

   823    */

   824 template <class StringT, class IteratorT, class Comparator>

   825 bool

   826 RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )

   827   {

   828     IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;

   829     aPattern.BeginReading(patternStart);

   830     aPattern.EndReading(patternEnd);

   832       // Point to the last character in the pattern

   833     --patternEnd;

   834       // outer loop keeps searching till we run out of string to search

   835     while ( aSearchStart != searchEnd )

   836       {

   837           // Point to the end position of the next possible match

   838         --searchEnd;

   840           // Check last character, if a match, explore further from here

   841         if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 )

   842           {

   843               // We're at a potential match, let's see if we really hit one

   844             IteratorT testPattern(patternEnd);

   845             IteratorT testSearch(searchEnd);

   847               // inner loop verifies the potential match at the current position

   848             do

   849               {

   850                   // if we verified all the way to the end of the pattern, then we found it!

   851                 if ( testPattern == patternStart )

   852                   {

   853                     aSearchStart = testSearch;  // point to start of match

   854                     aSearchEnd = ++searchEnd;   // point to end of match

   855                     return true;

   856                   }

   858                   // if we got to end of the string we're searching before we hit the end of the

   859                   //  pattern, we'll never find what we're looking for

   860                 if ( testSearch == aSearchStart )

   861                   {

   862                     aSearchStart = aSearchEnd;

   863                     return false;

   864                   }

   866                   // test previous character for a match

   867                 --testPattern;

   868                 --testSearch;

   869               }

   870             while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 );

   871           }

   872       }

   874     aSearchStart = aSearchEnd;

   875     return false;

   876   }

   878 bool

   879 FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )

   880   {

   881     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);

   882   }

   884 bool

   885 FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)

   886   {

   887     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);

   888   }

   890 bool

   891 CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )

   892   {

   893     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());

   894   }

   896 bool

   897 RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)

   898   {

   899     return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);

   900   }

   902 bool

   903 RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)

   904   {

   905     return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);

   906   }

   908 bool

   909 FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )

   910   {

   911     int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();

   913     const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);

   914     if ( charFoundAt ) {

   915       aSearchStart.advance( charFoundAt - aSearchStart.get() );

   916       return true;

   917     }

   919     aSearchStart.advance(fragmentLength);

   920     return false;

   921   }

   923 bool

   924 FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd )

   925   {

   926     int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();

   928     const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);

   929     if ( charFoundAt ) {

   930       aSearchStart.advance( charFoundAt - aSearchStart.get() );

   931       return true;

   932     }

   934     aSearchStart.advance(fragmentLength);

   935     return false;

   936   }

   938 uint32_t

   939 CountCharInReadable( const nsAString& aStr,

   940                      char16_t aChar )

   941 {

   942   uint32_t count = 0;

   943   nsAString::const_iterator begin, end;

   945   aStr.BeginReading(begin);

   946   aStr.EndReading(end);

   948   while (begin != end) {

   949     if (*begin == aChar) {

   950       ++count;

   951     }

   952     ++begin;

   953   }

   955   return count;

   956 }

   958 uint32_t

   959 CountCharInReadable( const nsACString& aStr,

   960                      char aChar )

   961 {

   962   uint32_t count = 0;

   963   nsACString::const_iterator begin, end;

   965   aStr.BeginReading(begin);

   966   aStr.EndReading(end);

   968   while (begin != end) {

   969     if (*begin == aChar) {

   970       ++count;

   971     }

   972     ++begin;

   973   }

   975   return count;

   976 }

   978 bool

   979 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,

   980                   const nsStringComparator& aComparator )

   981   {

   982     nsAString::size_type src_len = aSource.Length(),

   983                          sub_len = aSubstring.Length();

   984     if (sub_len > src_len)

   985       return false;

   986     return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);

   987   }

   989 bool

   990 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,

   991                   const nsCStringComparator& aComparator )

   992   {

   993     nsACString::size_type src_len = aSource.Length(),

   994                           sub_len = aSubstring.Length();

   995     if (sub_len > src_len)

   996       return false;

   997     return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);

   998   }

  1000 bool

  1001 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,

  1002                 const nsStringComparator& aComparator )

  1003   {

  1004     nsAString::size_type src_len = aSource.Length(),

  1005                          sub_len = aSubstring.Length();

  1006     if (sub_len > src_len)

  1007       return false;

  1008     return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,

  1009                                                                  aComparator);

  1010   }

  1012 bool

  1013 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,

  1014                 const nsCStringComparator& aComparator )

  1015   {

  1016     nsACString::size_type src_len = aSource.Length(),

  1017                           sub_len = aSubstring.Length();

  1018     if (sub_len > src_len)

  1019       return false;

  1020     return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,

  1021                                                                  aComparator);

  1022   }

  1026 static const char16_t empty_buffer[1] = { '\0' };

  1028 const nsAFlatString&

  1029 EmptyString()

  1030   {

  1031     static const nsDependentString sEmpty(empty_buffer);

  1033     return sEmpty;

  1034   }

  1036 const nsAFlatCString&

  1037 EmptyCString()

  1038   {

  1039     static const nsDependentCString sEmpty((const char *)empty_buffer);

  1041     return sEmpty;

  1042   }

  1044 const nsAFlatString&

  1045 NullString()

  1046   {

  1047     static const nsXPIDLString sNull;

  1049     return sNull;

  1050   }

  1052 const nsAFlatCString&

  1053 NullCString()

  1054   {

  1055     static const nsXPIDLCString sNull;

  1057     return sNull;

  1058   }

  1060 int32_t

  1061 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,

  1062                    const nsASingleFragmentString& aUTF16String)

  1063   {

  1064     static const uint32_t NOT_ASCII = uint32_t(~0x7F);

  1066     const char *u8, *u8end;

  1067     aUTF8String.BeginReading(u8);

  1068     aUTF8String.EndReading(u8end);

  1070     const char16_t *u16, *u16end;

  1071     aUTF16String.BeginReading(u16);

  1072     aUTF16String.EndReading(u16end);

  1074     while (u8 != u8end && u16 != u16end)

  1075       {

  1076         // Cast away the signedness of *u8 to prevent signextension when

  1077         // converting to uint32_t

  1078         uint32_t c8_32 = (uint8_t)*u8;

  1080         if (c8_32 & NOT_ASCII)

  1081           {

  1082             bool err;

  1083             c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);

  1084             if (err)

  1085               return INT32_MIN;

  1087             uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);

  1088             // The above UTF16CharEnumerator::NextChar() calls can

  1089             // fail, but if it does for anything other than no data to

  1090             // look at (which can't happen here), it returns the

  1091             // Unicode replacement character 0xFFFD for the invalid

  1092             // data they were fed. Ignore that error and treat invalid

  1093             // UTF16 as 0xFFFD.

  1094             //

  1095             // This matches what our UTF16 to UTF8 conversion code

  1096             // does, and thus a UTF8 string that came from an invalid

  1097             // UTF16 string will compare equal to the invalid UTF16

  1098             // string it came from. Same is true for any other UTF16

  1099             // string differs only in the invalid part of the string.

  1101             if (c8_32 != c16_32)

  1102               return c8_32 < c16_32 ? -1 : 1;

  1103           }

  1104         else

  1105           {

  1106             if (c8_32 != *u16)

  1107               return c8_32 > *u16 ? 1 : -1;

  1109             ++u8;

  1110             ++u16;

  1111           }

  1112       }

  1114     if (u8 != u8end)

  1115       {

  1116         // We get to the end of the UTF16 string, but no to the end of

  1117         // the UTF8 string. The UTF8 string is longer than the UTF16

  1118         // string

  1120         return 1;

  1121       }

  1123     if (u16 != u16end)

  1124       {

  1125         // We get to the end of the UTF8 string, but no to the end of

  1126         // the UTF16 string. The UTF16 string is longer than the UTF8

  1127         // string

  1129         return -1;

  1130       }

  1132     // The two strings match.

  1134     return 0;

  1135   }

  1137 void

  1138 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)

  1139   {

  1140     NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");

  1141     if (IS_IN_BMP(aSource))

  1142       {

  1143         aDest.Append(char16_t(aSource));

  1144       }

  1145     else

  1146       {

  1147         aDest.Append(H_SURROGATE(aSource));

  1148         aDest.Append(L_SURROGATE(aSource));

  1149       }

  1150   }

The Tor Browser / file revision

xpcom/string/src/nsReadableUtils.cpp@6474c204b198

xpcom/string/src/nsReadableUtils.cpp