xpcom/string/src/nsReadableUtils.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "nsReadableUtils.h"
     8 #include "nsMemory.h"
     9 #include "nsString.h"
    10 #include "nsTArray.h"
    11 #include "nsUTF8Utils.h"
    13 void
    14 LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest )
    15   {
    16     aDest.Truncate();
    17     LossyAppendUTF16toASCII(aSource, aDest);
    18   }
    20 void
    21 CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
    22   {
    23     aDest.Truncate();
    24     AppendASCIItoUTF16(aSource, aDest);
    25   }
    27 void
    28 LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest )
    29   {
    30     aDest.Truncate();
    31     if (aSource) {
    32       LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
    33     }
    34   }
    36 void
    37 CopyASCIItoUTF16( const char* aSource, nsAString& aDest )
    38   {
    39     aDest.Truncate();
    40     if (aSource) {
    41       AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
    42     }
    43   }
    45 void
    46 CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
    47   {
    48     aDest.Truncate();
    49     AppendUTF16toUTF8(aSource, aDest);
    50   }
    52 void
    53 CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
    54   {
    55     aDest.Truncate();
    56     AppendUTF8toUTF16(aSource, aDest);
    57   }
    59 void
    60 CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest )
    61   {
    62     aDest.Truncate();
    63     AppendUTF16toUTF8(aSource, aDest);
    64   }
    66 void
    67 CopyUTF8toUTF16( const char* aSource, nsAString& aDest )
    68   {
    69     aDest.Truncate();
    70     AppendUTF8toUTF16(aSource, aDest);
    71   }
    73 void
    74 LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest )
    75   {
    76     uint32_t old_dest_length = aDest.Length();
    77     aDest.SetLength(old_dest_length + aSource.Length());
    79     nsAString::const_iterator fromBegin, fromEnd;
    81     nsACString::iterator dest;
    82     aDest.BeginWriting(dest);
    84     dest.advance(old_dest_length);
    86     // right now, this won't work on multi-fragment destinations
    87     LossyConvertEncoding16to8 converter(dest.get());
    89     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
    90   }
    92 void
    93 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
    94   {
    95     if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible_t())) {
    96       NS_ABORT_OOM(aDest.Length() + aSource.Length());
    97     }
    98   }
   100 bool
   101 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest,
   102                     const mozilla::fallible_t& )
   103   {
   104     uint32_t old_dest_length = aDest.Length();
   105     if (!aDest.SetLength(old_dest_length + aSource.Length(), mozilla::fallible_t())) {
   106       return false;
   107     }
   109     nsACString::const_iterator fromBegin, fromEnd;
   111     nsAString::iterator dest;
   112     aDest.BeginWriting(dest);
   114     dest.advance(old_dest_length);
   116       // right now, this won't work on multi-fragment destinations
   117     LossyConvertEncoding8to16 converter(dest.get());
   119     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
   120     return true;
   121   }
   123 void
   124 LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest )
   125   {
   126     if (aSource) {
   127       LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
   128     }
   129   }
   131 void
   132 AppendASCIItoUTF16( const char* aSource, nsAString& aDest )
   133   {
   134     if (aSource) {
   135       AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
   136     }
   137   }
   139 void
   140 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
   141 {
   142   if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible_t())) {
   143     NS_ABORT_OOM(aDest.Length() + aSource.Length());
   144   }
   145 }
   147 bool
   148 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest,
   149                    const mozilla::fallible_t& )
   150   {
   151     nsAString::const_iterator source_start, source_end;
   152     CalculateUTF8Size calculator;
   153     copy_string(aSource.BeginReading(source_start),
   154                 aSource.EndReading(source_end), calculator);
   156     uint32_t count = calculator.Size();
   158     if (count)
   159       {
   160         uint32_t old_dest_length = aDest.Length();
   162         // Grow the buffer if we need to.
   163         if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) {
   164           return false;
   165         }
   167         // All ready? Time to convert
   169         ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length);
   170         copy_string(aSource.BeginReading(source_start),
   171                     aSource.EndReading(source_end), converter);
   173         NS_ASSERTION(converter.Size() == count,
   174                      "Unexpected disparity between CalculateUTF8Size and "
   175                      "ConvertUTF16toUTF8");
   176       }
   178     return true;
   179   }
   181 void
   182 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
   183 {
   184   if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible_t())) {
   185     NS_ABORT_OOM(aDest.Length() + aSource.Length());
   186   }
   187 }
   189 bool
   190 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest,
   191                    const mozilla::fallible_t& )
   192   {
   193     nsACString::const_iterator source_start, source_end;
   194     CalculateUTF8Length calculator;
   195     copy_string(aSource.BeginReading(source_start),
   196                 aSource.EndReading(source_end), calculator);
   198     uint32_t count = calculator.Length();
   200     // Avoid making the string mutable if we're appending an empty string
   201     if (count)
   202       {
   203         uint32_t old_dest_length = aDest.Length();
   205         // Grow the buffer if we need to.
   206         if (!aDest.SetLength(old_dest_length + count, mozilla::fallible_t())) {
   207           return false;
   208         }
   210         // All ready? Time to convert
   212         ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
   213         copy_string(aSource.BeginReading(source_start),
   214                     aSource.EndReading(source_end), converter);
   216         NS_ASSERTION(converter.ErrorEncountered() ||
   217                      converter.Length() == count,
   218                      "CalculateUTF8Length produced the wrong length");
   220         if (converter.ErrorEncountered())
   221           {
   222             NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
   223             aDest.SetLength(old_dest_length);
   224           }
   225       }
   227     return true;
   228   }
   230 void
   231 AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest )
   232   {
   233     if (aSource) {
   234       AppendUTF16toUTF8(nsDependentString(aSource), aDest);
   235     }
   236   }
   238 void
   239 AppendUTF8toUTF16( const char* aSource, nsAString& aDest )
   240   {
   241     if (aSource) {
   242       AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
   243     }
   244   }
   247   /**
   248    * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
   249    *
   250    * @param aSource an string you will eventually be making a copy of
   251    * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
   252    *
   253    */
   254 template <class FromStringT, class ToCharT>
   255 inline
   256 ToCharT*
   257 AllocateStringCopy( const FromStringT& aSource, ToCharT* )
   258   {
   259     return static_cast<ToCharT*>(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT)));
   260   }
   263 char*
   264 ToNewCString( const nsAString& aSource )
   265   {
   266     char* result = AllocateStringCopy(aSource, (char*)0);
   267     if (!result)
   268       return nullptr;
   270     nsAString::const_iterator fromBegin, fromEnd;
   271     LossyConvertEncoding16to8 converter(result);
   272     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
   273     return result;
   274   }
   276 char*
   277 ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count )
   278   {
   279     nsAString::const_iterator start, end;
   280     CalculateUTF8Size calculator;
   281     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
   282                 calculator);
   284     if (aUTF8Count)
   285       *aUTF8Count = calculator.Size();
   287     char *result = static_cast<char*>
   288                               (nsMemory::Alloc(calculator.Size() + 1));
   289     if (!result)
   290       return nullptr;
   292     ConvertUTF16toUTF8 converter(result);
   293     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
   294                 converter).write_terminator();
   295     NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
   297     return result;
   298   }
   300 char*
   301 ToNewCString( const nsACString& aSource )
   302   {
   303     // no conversion needed, just allocate a buffer of the correct length and copy into it
   305     char* result = AllocateStringCopy(aSource, (char*)0);
   306     if (!result)
   307       return nullptr;
   309     nsACString::const_iterator fromBegin, fromEnd;
   310     char* toBegin = result;
   311     *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0);
   312     return result;
   313   }
   315 char16_t*
   316 ToNewUnicode( const nsAString& aSource )
   317   {
   318     // no conversion needed, just allocate a buffer of the correct length and copy into it
   320     char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
   321     if (!result)
   322       return nullptr;
   324     nsAString::const_iterator fromBegin, fromEnd;
   325     char16_t* toBegin = result;
   326     *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char16_t(0);
   327     return result;
   328   }
   330 char16_t*
   331 ToNewUnicode( const nsACString& aSource )
   332   {
   333     char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
   334     if (!result)
   335       return nullptr;
   337     nsACString::const_iterator fromBegin, fromEnd;
   338     LossyConvertEncoding8to16 converter(result);
   339     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
   340     return result;
   341   }
   343 uint32_t
   344 CalcUTF8ToUnicodeLength( const nsACString& aSource)
   345   {
   346     nsACString::const_iterator start, end;
   347     CalculateUTF8Length calculator;
   348     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
   349                 calculator);
   350     return calculator.Length();
   351   }
   353 char16_t*
   354 UTF8ToUnicodeBuffer( const nsACString& aSource, char16_t* aBuffer, uint32_t *aUTF16Count )
   355   {
   356     nsACString::const_iterator start, end;
   357     ConvertUTF8toUTF16 converter(aBuffer);
   358     copy_string(aSource.BeginReading(start),
   359                 aSource.EndReading(end),
   360                 converter).write_terminator();
   361     if (aUTF16Count)
   362       *aUTF16Count = converter.Length();
   363     return aBuffer;
   364   }
   366 char16_t*
   367 UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count )
   368   {
   369     const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
   370     const size_t buffer_size = (length + 1) * sizeof(char16_t);
   371     char16_t *buffer = static_cast<char16_t*>(nsMemory::Alloc(buffer_size));
   372     if (!buffer)
   373       return nullptr;
   375     uint32_t copied;
   376     UTF8ToUnicodeBuffer(aSource, buffer, &copied);
   377     NS_ASSERTION(length == copied, "length mismatch");
   379     if (aUTF16Count)
   380       *aUTF16Count = copied;
   381     return buffer;
   382   }
   384 char16_t*
   385 CopyUnicodeTo( const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, uint32_t aLength )
   386   {
   387     nsAString::const_iterator fromBegin, fromEnd;
   388     char16_t* toBegin = aDest;    
   389     copy_string(aSource.BeginReading(fromBegin).advance( int32_t(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( int32_t(aSrcOffset+aLength) ), toBegin);
   390     return aDest;
   391   }
   393 void
   394 CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
   395                const nsAString::const_iterator& aSrcEnd,
   396                nsAString& aDest )
   397   {
   398     nsAString::iterator writer;
   399     aDest.SetLength(Distance(aSrcStart, aSrcEnd));
   401     aDest.BeginWriting(writer);
   402     nsAString::const_iterator fromBegin(aSrcStart);
   404     copy_string(fromBegin, aSrcEnd, writer);
   405   }
   407 void
   408 AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
   409                  const nsAString::const_iterator& aSrcEnd,
   410                  nsAString& aDest )
   411   {
   412     nsAString::iterator writer;
   413     uint32_t oldLength = aDest.Length();
   414     aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
   416     aDest.BeginWriting(writer).advance(oldLength);
   417     nsAString::const_iterator fromBegin(aSrcStart);
   419     copy_string(fromBegin, aSrcEnd, writer);
   420   }
   422 bool
   423 IsASCII( const nsAString& aString )
   424   {
   425     static const char16_t NOT_ASCII = char16_t(~0x007F);
   428     // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
   430     nsAString::const_iterator iter, done_reading;
   431     aString.BeginReading(iter);
   432     aString.EndReading(done_reading);
   434     const char16_t* c = iter.get();
   435     const char16_t* end = done_reading.get();
   437     while ( c < end )
   438       {
   439         if ( *c++ & NOT_ASCII )
   440           return false;
   441       }
   443     return true;
   444   }
   446 bool
   447 IsASCII( const nsACString& aString )
   448   {
   449     static const char NOT_ASCII = char(~0x7F);
   452     // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
   454     nsACString::const_iterator iter, done_reading;
   455     aString.BeginReading(iter);
   456     aString.EndReading(done_reading);
   458     const char* c = iter.get();
   459     const char* end = done_reading.get();
   461     while ( c < end )
   462       {
   463         if ( *c++ & NOT_ASCII )
   464           return false;
   465       }
   467     return true;
   468   }
   470 bool
   471 IsUTF8( const nsACString& aString, bool aRejectNonChar )
   472   {
   473     nsReadingIterator<char> done_reading;
   474     aString.EndReading(done_reading);
   476     int32_t state = 0;
   477     bool overlong = false;
   478     bool surrogate = false;
   479     bool nonchar = false;
   480     uint16_t olupper = 0; // overlong byte upper bound.
   481     uint16_t slower = 0;  // surrogate byte lower bound.
   483     nsReadingIterator<char> iter;
   484     aString.BeginReading(iter);
   486     const char* ptr = iter.get();
   487     const char* end = done_reading.get();
   488     while ( ptr < end )
   489       {
   490         uint8_t c;
   492         if (0 == state)
   493           {
   494             c = *ptr++;
   496             if ( UTF8traits::isASCII(c) ) 
   497               continue;
   499             if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
   500               return false;
   501             else if ( UTF8traits::is2byte(c) ) 
   502                 state = 1;
   503             else if ( UTF8traits::is3byte(c) ) 
   504               {
   505                 state = 2;
   506                 if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF] 
   507                   {
   508                     overlong = true;
   509                     olupper = 0x9F;
   510                   }
   511                 else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
   512                   {
   513                     surrogate = true;
   514                     slower = 0xA0;
   515                   }
   516                 else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
   517                   nonchar = true;
   518               }
   519             else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
   520               {
   521                 state = 3;
   522                 nonchar = true;
   523                 if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
   524                   {
   525                     overlong = true;
   526                     olupper = 0x8F;
   527                   }
   528                 else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF] 
   529                   {
   530                     // actually not surrogates but codepoints beyond 0x10FFFF
   531                     surrogate = true;
   532                     slower = 0x90;
   533                   }
   534               }
   535             else
   536               return false; // Not UTF-8 string
   537           }
   539         if (nonchar && !aRejectNonChar)
   540           nonchar = false;
   542         while ( ptr < end && state )
   543           {
   544             c = *ptr++;
   545             --state;
   547             // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
   548             if ( nonchar &&  
   549                  ( ( !state && c < 0xBE ) ||
   550                    ( state == 1 && c != 0xBF )  ||
   551                    ( state == 2 && 0x0F != (0x0F & c) )))
   552               nonchar = false;
   554             if ( !UTF8traits::isInSeq(c) || ( overlong && c <= olupper ) || 
   555                  ( surrogate && slower <= c ) || ( nonchar && !state ))
   556               return false; // Not UTF-8 string
   558             overlong = surrogate = false;
   559           }
   560         }
   561     return !state; // state != 0 at the end indicates an invalid UTF-8 seq. 
   562   }
   564   /**
   565    * A character sink for in-place case conversion.
   566    */
   567 class ConvertToUpperCase
   568   {
   569     public:
   570       typedef char value_type;
   572       uint32_t
   573       write( const char* aSource, uint32_t aSourceLength )
   574         {
   575           char* cp = const_cast<char*>(aSource);
   576           const char* end = aSource + aSourceLength;
   577           while (cp != end) {
   578             char ch = *cp;
   579             if ((ch >= 'a') && (ch <= 'z'))
   580               *cp = ch - ('a' - 'A');
   581             ++cp;
   582           }
   583           return aSourceLength;
   584         }
   585   };
   587 void
   588 ToUpperCase( nsCSubstring& aCString )
   589   {
   590     ConvertToUpperCase converter;
   591     char* start;
   592     converter.write(aCString.BeginWriting(start), aCString.Length());
   593   }
   595   /**
   596    * A character sink for copying with case conversion.
   597    */
   598 class CopyToUpperCase
   599   {
   600     public:
   601       typedef char value_type;
   603       CopyToUpperCase( nsACString::iterator& aDestIter )
   604         : mIter(aDestIter)
   605         {
   606         }
   608       uint32_t
   609       write( const char* aSource, uint32_t aSourceLength )
   610         {
   611           uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength);
   612           char* cp = mIter.get();
   613           const char* end = aSource + len;
   614           while (aSource != end) {
   615             char ch = *aSource;
   616             if ((ch >= 'a') && (ch <= 'z'))
   617               *cp = ch - ('a' - 'A');
   618             else
   619               *cp = ch;
   620             ++aSource;
   621             ++cp;
   622           }
   623           mIter.advance(len);
   624           return len;
   625         }
   627     protected:
   628       nsACString::iterator& mIter;
   629   };
   631 void
   632 ToUpperCase( const nsACString& aSource, nsACString& aDest )
   633   {
   634     nsACString::const_iterator fromBegin, fromEnd;
   635     nsACString::iterator toBegin;
   636     aDest.SetLength(aSource.Length());
   638     CopyToUpperCase converter(aDest.BeginWriting(toBegin));
   639     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
   640   }
   642   /**
   643    * A character sink for case conversion.
   644    */
   645 class ConvertToLowerCase
   646   {
   647     public:
   648       typedef char value_type;
   650       uint32_t
   651       write( const char* aSource, uint32_t aSourceLength )
   652         {
   653           char* cp = const_cast<char*>(aSource);
   654           const char* end = aSource + aSourceLength;
   655           while (cp != end) {
   656             char ch = *cp;
   657             if ((ch >= 'A') && (ch <= 'Z'))
   658               *cp = ch + ('a' - 'A');
   659             ++cp;
   660           }
   661           return aSourceLength;
   662         }
   663   };
   665 void
   666 ToLowerCase( nsCSubstring& aCString )
   667   {
   668     ConvertToLowerCase converter;
   669     char* start;
   670     converter.write(aCString.BeginWriting(start), aCString.Length());
   671   }
   673   /**
   674    * A character sink for copying with case conversion.
   675    */
   676 class CopyToLowerCase
   677   {
   678     public:
   679       typedef char value_type;
   681       CopyToLowerCase( nsACString::iterator& aDestIter )
   682         : mIter(aDestIter)
   683         {
   684         }
   686       uint32_t
   687       write( const char* aSource, uint32_t aSourceLength )
   688         {
   689           uint32_t len = XPCOM_MIN(uint32_t(mIter.size_forward()), aSourceLength);
   690           char* cp = mIter.get();
   691           const char* end = aSource + len;
   692           while (aSource != end) {
   693             char ch = *aSource;
   694             if ((ch >= 'A') && (ch <= 'Z'))
   695               *cp = ch + ('a' - 'A');
   696             else
   697               *cp = ch;
   698             ++aSource;
   699             ++cp;
   700           }
   701           mIter.advance(len);
   702           return len;
   703         }
   705     protected:
   706       nsACString::iterator& mIter;
   707   };
   709 void
   710 ToLowerCase( const nsACString& aSource, nsACString& aDest )
   711   {
   712     nsACString::const_iterator fromBegin, fromEnd;
   713     nsACString::iterator toBegin;
   714     aDest.SetLength(aSource.Length());
   716     CopyToLowerCase converter(aDest.BeginWriting(toBegin));
   717     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
   718   }
   720 bool
   721 ParseString(const nsACString& aSource, char aDelimiter, 
   722             nsTArray<nsCString>& aArray)
   723   {
   724     nsACString::const_iterator start, end;
   725     aSource.BeginReading(start);
   726     aSource.EndReading(end);
   728     uint32_t oldLength = aArray.Length();
   730     for (;;)
   731       {
   732         nsACString::const_iterator delimiter = start;
   733         FindCharInReadable(aDelimiter, delimiter, end);
   735         if (delimiter != start)
   736           {
   737             if (!aArray.AppendElement(Substring(start, delimiter)))
   738               {
   739                 aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength);
   740                 return false;
   741               }
   742           }
   744         if (delimiter == end)
   745           break;
   746         start = ++delimiter;
   747         if (start == end)
   748           break;
   749       }
   751     return true;
   752   }
   754 template <class StringT, class IteratorT, class Comparator>
   755 bool
   756 FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
   757   {
   758     bool found_it = false;
   760       // only bother searching at all if we're given a non-empty range to search
   761     if ( aSearchStart != aSearchEnd )
   762       {
   763         IteratorT aPatternStart, aPatternEnd;
   764         aPattern.BeginReading(aPatternStart);
   765         aPattern.EndReading(aPatternEnd);
   767           // outer loop keeps searching till we find it or run out of string to search
   768         while ( !found_it )
   769           {
   770               // fast inner loop (that's what it's called, not what it is) looks for a potential match
   771             while ( aSearchStart != aSearchEnd &&
   772                     compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
   773               ++aSearchStart;
   775               // if we broke out of the `fast' loop because we're out of string ... we're done: no match
   776             if ( aSearchStart == aSearchEnd )
   777               break;
   779               // otherwise, we're at a potential match, let's see if we really hit one
   780             IteratorT testPattern(aPatternStart);
   781             IteratorT testSearch(aSearchStart);
   783               // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
   784             for(;;)
   785               {
   786                   // we already compared the first character in the outer loop,
   787                   //  so we'll advance before the next comparison
   788                 ++testPattern;
   789                 ++testSearch;
   791                   // if we verified all the way to the end of the pattern, then we found it!
   792                 if ( testPattern == aPatternEnd )
   793                   {
   794                     found_it = true;
   795                     aSearchEnd = testSearch; // return the exact found range through the parameters
   796                     break;
   797                   }
   799                   // if we got to end of the string we're searching before we hit the end of the
   800                   //  pattern, we'll never find what we're looking for
   801                 if ( testSearch == aSearchEnd )
   802                   {
   803                     aSearchStart = aSearchEnd;
   804                     break;
   805                   }
   807                   // else if we mismatched ... it's time to advance to the next search position
   808                   //  and get back into the `fast' loop
   809                 if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
   810                   {
   811                     ++aSearchStart;
   812                     break;
   813                   }
   814               }
   815           }
   816       }
   818     return found_it;
   819   }
   821   /**
   822    * This searches the entire string from right to left, and returns the first match found, if any.
   823    */
   824 template <class StringT, class IteratorT, class Comparator>
   825 bool
   826 RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
   827   {
   828     IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
   829     aPattern.BeginReading(patternStart);
   830     aPattern.EndReading(patternEnd);
   832       // Point to the last character in the pattern
   833     --patternEnd;
   834       // outer loop keeps searching till we run out of string to search
   835     while ( aSearchStart != searchEnd )
   836       {
   837           // Point to the end position of the next possible match
   838         --searchEnd;
   840           // Check last character, if a match, explore further from here
   841         if ( compare(patternEnd.get(), searchEnd.get(), 1, 1) == 0 )
   842           {  
   843               // We're at a potential match, let's see if we really hit one
   844             IteratorT testPattern(patternEnd);
   845             IteratorT testSearch(searchEnd);
   847               // inner loop verifies the potential match at the current position
   848             do
   849               {
   850                   // if we verified all the way to the end of the pattern, then we found it!
   851                 if ( testPattern == patternStart )
   852                   {
   853                     aSearchStart = testSearch;  // point to start of match
   854                     aSearchEnd = ++searchEnd;   // point to end of match
   855                     return true;
   856                   }
   858                   // if we got to end of the string we're searching before we hit the end of the
   859                   //  pattern, we'll never find what we're looking for
   860                 if ( testSearch == aSearchStart )
   861                   {
   862                     aSearchStart = aSearchEnd;
   863                     return false;
   864                   }
   866                   // test previous character for a match
   867                 --testPattern;
   868                 --testSearch;
   869               }
   870             while ( compare(testPattern.get(), testSearch.get(), 1, 1) == 0 );
   871           }
   872       }
   874     aSearchStart = aSearchEnd;
   875     return false;
   876   }
   878 bool
   879 FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )
   880   {
   881     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
   882   }
   884 bool
   885 FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
   886   {
   887     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
   888   }
   890 bool
   891 CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )
   892   {
   893     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());
   894   }
   896 bool
   897 RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)
   898   {
   899     return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
   900   }
   902 bool
   903 RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
   904   {
   905     return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
   906   }
   908 bool
   909 FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )
   910   {
   911     int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
   913     const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
   914     if ( charFoundAt ) {
   915       aSearchStart.advance( charFoundAt - aSearchStart.get() );
   916       return true;
   917     }
   919     aSearchStart.advance(fragmentLength);
   920     return false;
   921   }
   923 bool
   924 FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd )
   925   {
   926     int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
   928     const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
   929     if ( charFoundAt ) {
   930       aSearchStart.advance( charFoundAt - aSearchStart.get() );
   931       return true;
   932     }
   934     aSearchStart.advance(fragmentLength);
   935     return false;
   936   }
   938 uint32_t
   939 CountCharInReadable( const nsAString& aStr,
   940                      char16_t aChar )
   941 {
   942   uint32_t count = 0;
   943   nsAString::const_iterator begin, end;
   945   aStr.BeginReading(begin);
   946   aStr.EndReading(end);
   948   while (begin != end) {
   949     if (*begin == aChar) {
   950       ++count;
   951     }
   952     ++begin;
   953   }
   955   return count;
   956 }
   958 uint32_t
   959 CountCharInReadable( const nsACString& aStr,
   960                      char aChar )
   961 {
   962   uint32_t count = 0;
   963   nsACString::const_iterator begin, end;
   965   aStr.BeginReading(begin);
   966   aStr.EndReading(end);
   968   while (begin != end) {
   969     if (*begin == aChar) {
   970       ++count;
   971     }
   972     ++begin;
   973   }
   975   return count;
   976 }
   978 bool
   979 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
   980                   const nsStringComparator& aComparator )
   981   {
   982     nsAString::size_type src_len = aSource.Length(),
   983                          sub_len = aSubstring.Length();
   984     if (sub_len > src_len)
   985       return false;
   986     return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
   987   }
   989 bool
   990 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
   991                   const nsCStringComparator& aComparator )
   992   {
   993     nsACString::size_type src_len = aSource.Length(),
   994                           sub_len = aSubstring.Length();
   995     if (sub_len > src_len)
   996       return false;
   997     return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
   998   }
  1000 bool
  1001 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
  1002                 const nsStringComparator& aComparator )
  1004     nsAString::size_type src_len = aSource.Length(),
  1005                          sub_len = aSubstring.Length();
  1006     if (sub_len > src_len)
  1007       return false;
  1008     return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
  1009                                                                  aComparator);
  1012 bool
  1013 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
  1014                 const nsCStringComparator& aComparator )
  1016     nsACString::size_type src_len = aSource.Length(),
  1017                           sub_len = aSubstring.Length();
  1018     if (sub_len > src_len)
  1019       return false;
  1020     return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
  1021                                                                  aComparator);
  1026 static const char16_t empty_buffer[1] = { '\0' };
  1028 const nsAFlatString&
  1029 EmptyString()
  1031     static const nsDependentString sEmpty(empty_buffer);
  1033     return sEmpty;
  1036 const nsAFlatCString&
  1037 EmptyCString()
  1039     static const nsDependentCString sEmpty((const char *)empty_buffer);
  1041     return sEmpty;
  1044 const nsAFlatString&
  1045 NullString()
  1047     static const nsXPIDLString sNull;
  1049     return sNull;
  1052 const nsAFlatCString&
  1053 NullCString()
  1055     static const nsXPIDLCString sNull;
  1057     return sNull;
  1060 int32_t
  1061 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
  1062                    const nsASingleFragmentString& aUTF16String)
  1064     static const uint32_t NOT_ASCII = uint32_t(~0x7F);
  1066     const char *u8, *u8end;
  1067     aUTF8String.BeginReading(u8);
  1068     aUTF8String.EndReading(u8end);
  1070     const char16_t *u16, *u16end;
  1071     aUTF16String.BeginReading(u16);
  1072     aUTF16String.EndReading(u16end);
  1074     while (u8 != u8end && u16 != u16end)
  1076         // Cast away the signedness of *u8 to prevent signextension when
  1077         // converting to uint32_t
  1078         uint32_t c8_32 = (uint8_t)*u8;
  1080         if (c8_32 & NOT_ASCII)
  1082             bool err;
  1083             c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
  1084             if (err)
  1085               return INT32_MIN;
  1087             uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
  1088             // The above UTF16CharEnumerator::NextChar() calls can
  1089             // fail, but if it does for anything other than no data to
  1090             // look at (which can't happen here), it returns the
  1091             // Unicode replacement character 0xFFFD for the invalid
  1092             // data they were fed. Ignore that error and treat invalid
  1093             // UTF16 as 0xFFFD.
  1094             //
  1095             // This matches what our UTF16 to UTF8 conversion code
  1096             // does, and thus a UTF8 string that came from an invalid
  1097             // UTF16 string will compare equal to the invalid UTF16
  1098             // string it came from. Same is true for any other UTF16
  1099             // string differs only in the invalid part of the string.
  1101             if (c8_32 != c16_32)
  1102               return c8_32 < c16_32 ? -1 : 1;
  1104         else
  1106             if (c8_32 != *u16)
  1107               return c8_32 > *u16 ? 1 : -1;
  1109             ++u8;
  1110             ++u16;
  1114     if (u8 != u8end)
  1116         // We get to the end of the UTF16 string, but no to the end of
  1117         // the UTF8 string. The UTF8 string is longer than the UTF16
  1118         // string
  1120         return 1;
  1123     if (u16 != u16end)
  1125         // We get to the end of the UTF8 string, but no to the end of
  1126         // the UTF16 string. The UTF16 string is longer than the UTF8
  1127         // string
  1129         return -1;
  1132     // The two strings match.
  1134     return 0;
  1137 void
  1138 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)
  1140     NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
  1141     if (IS_IN_BMP(aSource))
  1143         aDest.Append(char16_t(aSource));
  1145     else
  1147         aDest.Append(H_SURROGATE(aSource));
  1148         aDest.Append(L_SURROGATE(aSource));

mercurial