The Tor Browser: parser/htmlparser/src/nsScanner.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

     2 /* vim: set ts=2 sw=2 et tw=78: */

     3 /* This Source Code Form is subject to the terms of the Mozilla Public

     4  * License, v. 2.0. If a copy of the MPL was not distributed with this

     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     7 //#define __INCREMENTAL 1

     9 #include "mozilla/DebugOnly.h"

    11 #include "nsScanner.h"

    12 #include "nsDebug.h"

    13 #include "nsReadableUtils.h"

    14 #include "nsIInputStream.h"

    15 #include "nsIFile.h"

    16 #include "nsNetUtil.h"

    17 #include "nsUTF8Utils.h" // for LossyConvertEncoding

    18 #include "nsCRT.h"

    19 #include "nsParser.h"

    20 #include "nsCharsetSource.h"

    22 #include "mozilla/dom/EncodingUtils.h"

    24 using mozilla::dom::EncodingUtils;

    26 // We replace NUL characters with this character.

    27 static char16_t sInvalid = UCS2_REPLACEMENT_CHAR;

    29 nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :

    30   mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set

    31 {

    32   // Build filter that will be used to filter out characters with

    33   // bits that none of the terminal chars have. This works very well

    34   // because terminal chars often have only the last 4-6 bits set and

    35   // normal ascii letters have bit 7 set. Other letters have even higher

    36   // bits set.

    38   // Calculate filter

    39   const char16_t *current = aTerminateChars;

    40   char16_t terminalChar = *current;

    41   while (terminalChar) {

    42     mFilter &= ~terminalChar;

    43     ++current;

    44     terminalChar = *current;

    45   }

    46 }

    48 /**

    49  *  Use this constructor if you want i/o to be based on

    50  *  a single string you hand in during construction.

    51  *  This short cut was added for Javascript.

    52  *

    53  *  @update  gess 5/12/98

    54  *  @param   aMode represents the parser mode (nav, other)

    55  *  @return

    56  */

    57 nsScanner::nsScanner(const nsAString& anHTMLString)

    58 {

    59   MOZ_COUNT_CTOR(nsScanner);

    61   mSlidingBuffer = nullptr;

    62   mCountRemaining = 0;

    63   mFirstNonWhitespacePosition = -1;

    64   if (AppendToBuffer(anHTMLString)) {

    65     mSlidingBuffer->BeginReading(mCurrentPosition);

    66   } else {

    67     /* XXX see hack below, re: bug 182067 */

    68     memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));

    69     mEndPosition = mCurrentPosition;

    70   }

    71   mMarkPosition = mCurrentPosition;

    72   mIncremental = false;

    73   mUnicodeDecoder = 0;

    74   mCharsetSource = kCharsetUninitialized;

    75   mHasInvalidCharacter = false;

    76   mReplacementCharacter = char16_t(0x0);

    77 }

    79 /**

    80  *  Use this constructor if you want i/o to be based on strings

    81  *  the scanner receives. If you pass a null filename, you

    82  *  can still provide data to the scanner via append.

    83  */

    84 nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)

    85   : mFilename(aFilename)

    86 {

    87   MOZ_COUNT_CTOR(nsScanner);

    88   NS_ASSERTION(!aCreateStream, "This is always true.");

    90   mSlidingBuffer = nullptr;

    92   // XXX This is a big hack.  We need to initialize the iterators to something.

    93   // What matters is that mCurrentPosition == mEndPosition, so that our methods

    94   // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition

    95   // so that we have some hope of catching null pointer dereferences associated

    96   // with this hack. --darin

    97   memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));

    98   mMarkPosition = mCurrentPosition;

    99   mEndPosition = mCurrentPosition;

   101   mIncremental = true;

   102   mFirstNonWhitespacePosition = -1;

   103   mCountRemaining = 0;

   105   mUnicodeDecoder = 0;

   106   mCharsetSource = kCharsetUninitialized;

   107   mHasInvalidCharacter = false;

   108   mReplacementCharacter = char16_t(0x0);

   109   // XML defaults to UTF-8 and about:blank is UTF-8, too.

   110   SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault);

   111 }

   113 nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource)

   114 {

   115   if (aSource < mCharsetSource) // priority is lower than the current one

   116     return NS_OK;

   118   mCharsetSource = aSource;

   120   nsCString charsetName;

   121   mozilla::DebugOnly<bool> valid =

   122       EncodingUtils::FindEncodingForLabel(aCharset, charsetName);

   123   MOZ_ASSERT(valid, "Should never call with a bogus aCharset.");

   125   if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {

   126     return NS_OK; // no difference, don't change it

   127   }

   129   // different, need to change it

   131   mCharset.Assign(charsetName);

   133   mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);

   134   mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);

   136   return NS_OK;

   137 }

   140 /**

   141  *  default destructor

   142  *

   143  *  @update  gess 3/25/98

   144  *  @param

   145  *  @return

   146  */

   147 nsScanner::~nsScanner() {

   149   delete mSlidingBuffer;

   151   MOZ_COUNT_DTOR(nsScanner);

   152 }

   154 /**

   155  *  Resets current offset position of input stream to marked position.

   156  *  This allows us to back up to this point if the need should arise,

   157  *  such as when tokenization gets interrupted.

   158  *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!

   159  *

   160  *  @update  gess 5/12/98

   161  *  @param

   162  *  @return

   163  */

   164 void nsScanner::RewindToMark(void){

   165   if (mSlidingBuffer) {

   166     mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));

   167     mCurrentPosition = mMarkPosition;

   168   }

   169 }

   172 /**

   173  *  Records current offset position in input stream. This allows us

   174  *  to back up to this point if the need should arise, such as when

   175  *  tokenization gets interrupted.

   176  *

   177  *  @update  gess 7/29/98

   178  *  @param

   179  *  @return

   180  */

   181 int32_t nsScanner::Mark() {

   182   int32_t distance = 0;

   183   if (mSlidingBuffer) {

   184     nsScannerIterator oldStart;

   185     mSlidingBuffer->BeginReading(oldStart);

   187     distance = Distance(oldStart, mCurrentPosition);

   189     mSlidingBuffer->DiscardPrefix(mCurrentPosition);

   190     mSlidingBuffer->BeginReading(mCurrentPosition);

   191     mMarkPosition = mCurrentPosition;

   192   }

   194   return distance;

   195 }

   197 /**

   198  * Insert data to our underlying input buffer as

   199  * if it were read from an input stream.

   200  *

   201  * @update  harishd 01/12/99

   202  * @return  error code

   203  */

   204 bool nsScanner::UngetReadable(const nsAString& aBuffer) {

   205   if (!mSlidingBuffer) {

   206     return false;

   207   }

   209   mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);

   210   mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators

   211   mSlidingBuffer->EndReading(mEndPosition);

   213   uint32_t length = aBuffer.Length();

   214   mCountRemaining += length; // Ref. bug 117441

   215   return true;

   216 }

   218 /**

   219  * Append data to our underlying input buffer as

   220  * if it were read from an input stream.

   221  *

   222  * @update  gess4/3/98

   223  * @return  error code

   224  */

   225 nsresult nsScanner::Append(const nsAString& aBuffer) {

   226   if (!AppendToBuffer(aBuffer))

   227     return NS_ERROR_OUT_OF_MEMORY;

   228   return NS_OK;

   229 }

   231 /**

   232  *

   233  *

   234  *  @update  gess 5/21/98

   235  *  @param

   236  *  @return

   237  */

   238 nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen,

   239                            nsIRequest *aRequest)

   240 {

   241   nsresult res = NS_OK;

   242   if (mUnicodeDecoder) {

   243     int32_t unicharBufLen = 0;

   244     mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);

   245     nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);

   246     NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);

   247     char16_t *unichars = buffer->DataStart();

   249     int32_t totalChars = 0;

   250     int32_t unicharLength = unicharBufLen;

   251     int32_t errorPos = -1;

   253     do {

   254       int32_t srcLength = aLen;

   255       res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);

   257       totalChars += unicharLength;

   258       // Continuation of failure case

   259       if(NS_FAILED(res)) {

   260         // if we failed, we consume one byte, replace it with the replacement

   261         // character and try the conversion again.

   263         // This is only needed because some decoders don't follow the

   264         // nsIUnicodeDecoder contract: they return a failure when *aDestLength

   265         // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT.  See bug 244177

   266         if ((unichars + unicharLength) >= buffer->DataEnd()) {

   267           NS_ERROR("Unexpected end of destination buffer");

   268           break;

   269         }

   271         if (mReplacementCharacter == 0x0 && errorPos == -1) {

   272           errorPos = totalChars;

   273         }

   274         unichars[unicharLength++] = mReplacementCharacter == 0x0 ?

   275                                     mUnicodeDecoder->GetCharacterForUnMapped() :

   276                                     mReplacementCharacter;

   278         unichars = unichars + unicharLength;

   279         unicharLength = unicharBufLen - (++totalChars);

   281         mUnicodeDecoder->Reset();

   283         if(((uint32_t) (srcLength + 1)) > aLen) {

   284           srcLength = aLen;

   285         }

   286         else {

   287           ++srcLength;

   288         }

   290         aBuffer += srcLength;

   291         aLen -= srcLength;

   292       }

   293     } while (NS_FAILED(res) && (aLen > 0));

   295     buffer->SetDataLength(totalChars);

   296     // Don't propagate return code of unicode decoder

   297     // since it doesn't reflect on our success or failure

   298     // - Ref. bug 87110

   299     res = NS_OK;

   300     if (!AppendToBuffer(buffer, aRequest, errorPos))

   301       res = NS_ERROR_OUT_OF_MEMORY;

   302   }

   303   else {

   304     NS_WARNING("No decoder found.");

   305     res = NS_ERROR_FAILURE;

   306   }

   308   return res;

   309 }

   311 /**

   312  *  retrieve next char from scanners internal input stream

   313  *

   314  *  @update  gess 3/25/98

   315  *  @param

   316  *  @return  error code reflecting read status

   317  */

   318 nsresult nsScanner::GetChar(char16_t& aChar) {

   319   if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {

   320     aChar = 0;

   321     return kEOF;

   322   }

   324   aChar = *mCurrentPosition++;

   325   --mCountRemaining;

   327   return NS_OK;

   328 }

   331 /**

   332  *  peek ahead to consume next char from scanner's internal

   333  *  input buffer

   334  *

   335  *  @update  gess 3/25/98

   336  *  @param

   337  *  @return

   338  */

   339 nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {

   340   aChar = 0;

   342   if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {

   343     return kEOF;

   344   }

   346   if (aOffset > 0) {

   347     if (mCountRemaining <= aOffset)

   348       return kEOF;

   350     nsScannerIterator pos = mCurrentPosition;

   351     pos.advance(aOffset);

   352     aChar=*pos;

   353   }

   354   else {

   355     aChar=*mCurrentPosition;

   356   }

   358   return NS_OK;

   359 }

   361 nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)

   362 {

   363   if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {

   364     return kEOF;

   365   }

   367   nsScannerIterator start, end;

   369   start = mCurrentPosition;

   371   if ((int32_t)mCountRemaining <= aOffset) {

   372     return kEOF;

   373   }

   375   if (aOffset > 0) {

   376     start.advance(aOffset);

   377   }

   379   if (mCountRemaining < uint32_t(aNumChars + aOffset)) {

   380     end = mEndPosition;

   381   }

   382   else {

   383     end = start;

   384     end.advance(aNumChars);

   385   }

   387   CopyUnicodeTo(start, end, aStr);

   389   return NS_OK;

   390 }

   393 /**

   394  *  Skip whitespace on scanner input stream

   395  *

   396  *  @update  gess 3/25/98

   397  *  @param

   398  *  @return  error status

   399  */

   400 nsresult nsScanner::SkipWhitespace(int32_t& aNewlinesSkipped) {

   402   if (!mSlidingBuffer) {

   403     return kEOF;

   404   }

   406   char16_t theChar = 0;

   407   nsresult  result = Peek(theChar);

   409   if (NS_FAILED(result)) {

   410     return result;

   411   }

   413   nsScannerIterator current = mCurrentPosition;

   414   bool      done = false;

   415   bool      skipped = false;

   417   while (!done && current != mEndPosition) {

   418     switch(theChar) {

   419       case '\n':

   420       case '\r': ++aNewlinesSkipped;

   421       case ' ' :

   422       case '\t':

   423         {

   424           skipped = true;

   425           char16_t thePrevChar = theChar;

   426           theChar = (++current != mEndPosition) ? *current : '\0';

   427           if ((thePrevChar == '\r' && theChar == '\n') ||

   428               (thePrevChar == '\n' && theChar == '\r')) {

   429             theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF

   430           }

   431         }

   432         break;

   433       default:

   434         done = true;

   435         break;

   436     }

   437   }

   439   if (skipped) {

   440     SetPosition(current);

   441     if (current == mEndPosition) {

   442       result = kEOF;

   443     }

   444   }

   446   return result;

   447 }

   449 /**

   450  *  Skip over chars as long as they equal given char

   451  *

   452  *  @update  gess 3/25/98

   453  *  @param

   454  *  @return  error code

   455  */

   456 nsresult nsScanner::SkipOver(char16_t aSkipChar){

   458   if (!mSlidingBuffer) {

   459     return kEOF;

   460   }

   462   char16_t ch=0;

   463   nsresult   result=NS_OK;

   465   while(NS_OK==result) {

   466     result=Peek(ch);

   467     if(NS_OK == result) {

   468       if(ch!=aSkipChar) {

   469         break;

   470       }

   471       GetChar(ch);

   472     }

   473     else break;

   474   } //while

   475   return result;

   477 }

   479 #if 0

   480 void DoErrTest(nsString& aString) {

   481   int32_t pos=aString.FindChar(0);

   482   if(kNotFound<pos) {

   483     if(aString.Length()-1!=pos) {

   484     }

   485   }

   486 }

   488 void DoErrTest(nsCString& aString) {

   489   int32_t pos=aString.FindChar(0);

   490   if(kNotFound<pos) {

   491     if(aString.Length()-1!=pos) {

   492     }

   493   }

   494 }

   495 #endif

   497 /**

   498  *  Consume characters until you run into space, a '<', a '>', or a '/'.

   499  *

   500  *  @param   aString - receives new data from stream

   501  *  @return  error code

   502  */

   503 nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) {

   505   if (!mSlidingBuffer) {

   506     return kEOF;

   507   }

   509   char16_t         theChar=0;

   510   nsresult          result=Peek(theChar);

   511   nsScannerIterator current, end;

   512   bool              found=false;

   514   current = mCurrentPosition;

   515   end = mEndPosition;

   517   // Loop until we find an illegal character. Everything is then appended

   518   // later.

   519   while(current != end && !found) {

   520     theChar=*current;

   522     switch(theChar) {

   523       case '\n':

   524       case '\r':

   525       case ' ' :

   526       case '\t':

   527       case '\v':

   528       case '\f':

   529       case '<':

   530       case '>':

   531       case '/':

   532         found = true;

   533         break;

   535       case '\0':

   536         ReplaceCharacter(current, sInvalid);

   537         break;

   539       default:

   540         break;

   541     }

   543     if (!found) {

   544       ++current;

   545     }

   546   }

   548   // Don't bother appending nothing.

   549   if (current != mCurrentPosition) {

   550     AppendUnicodeTo(mCurrentPosition, current, aString);

   551   }

   553   SetPosition(current);

   554   if (current == end) {

   555     result = kEOF;

   556   }

   558   //DoErrTest(aString);

   560   return result;

   561 }

   563 /**

   564  *  Consume characters until you run into a char that's not valid in an

   565  *  entity name

   566  *

   567  *  @param   aString - receives new data from stream

   568  *  @return  error code

   569  */

   570 nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {

   572   if (!mSlidingBuffer) {

   573     return kEOF;

   574   }

   576   char16_t         theChar=0;

   577   nsresult          result=Peek(theChar);

   578   nsScannerIterator origin, current, end;

   579   bool              found=false;

   581   origin = mCurrentPosition;

   582   current = mCurrentPosition;

   583   end = mEndPosition;

   585   while(current != end) {

   587     theChar=*current;

   588     if(theChar) {

   589       found=false;

   590       switch(theChar) {

   591         case '_':

   592         case '-':

   593         case '.':

   594           // Don't allow ':' in entity names.  See bug 23791

   595           found = true;

   596           break;

   597         default:

   598           found = ('a'<=theChar && theChar<='z') ||

   599                   ('A'<=theChar && theChar<='Z') ||

   600                   ('0'<=theChar && theChar<='9');

   601           break;

   602       }

   604       if(!found) {

   605         AppendUnicodeTo(mCurrentPosition, current, aString);

   606         break;

   607       }

   608     }

   609     ++current;

   610   }

   612   SetPosition(current);

   613   if (current == end) {

   614     AppendUnicodeTo(origin, current, aString);

   615     return kEOF;

   616   }

   618   //DoErrTest(aString);

   620   return result;

   621 }

   623 /**

   624  *  Consume digits

   625  *

   626  *  @param   aString - should contain digits

   627  *  @return  error code

   628  */

   629 nsresult nsScanner::ReadNumber(nsString& aString,int32_t aBase) {

   631   if (!mSlidingBuffer) {

   632     return kEOF;

   633   }

   635   NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");

   637   char16_t         theChar=0;

   638   nsresult          result=Peek(theChar);

   639   nsScannerIterator origin, current, end;

   641   origin = mCurrentPosition;

   642   current = origin;

   643   end = mEndPosition;

   645   bool done = false;

   646   while(current != end) {

   647     theChar=*current;

   648     if(theChar) {

   649       done = (theChar < '0' || theChar > '9') &&

   650              ((aBase == 16)? (theChar < 'A' || theChar > 'F') &&

   651                              (theChar < 'a' || theChar > 'f')

   652                              :true);

   653       if(done) {

   654         AppendUnicodeTo(origin, current, aString);

   655         break;

   656       }

   657     }

   658     ++current;

   659   }

   661   SetPosition(current);

   662   if (current == end) {

   663     AppendUnicodeTo(origin, current, aString);

   664     return kEOF;

   665   }

   667   //DoErrTest(aString);

   669   return result;

   670 }

   672 /**

   673  *  Consume characters until you find the terminal char

   674  *

   675  *  @update  gess 3/25/98

   676  *  @param   aString receives new data from stream

   677  *  @param   addTerminal tells us whether to append terminal to aString

   678  *  @return  error code

   679  */

   680 nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString,

   681                                    int32_t& aNewlinesSkipped,

   682                                    bool& aHaveCR) {

   684   aHaveCR = false;

   686   if (!mSlidingBuffer) {

   687     return kEOF;

   688   }

   690   char16_t theChar = 0;

   691   nsresult  result = Peek(theChar);

   693   if (NS_FAILED(result)) {

   694     return result;

   695   }

   697   nsScannerIterator origin, current, end;

   698   bool done = false;

   700   origin = mCurrentPosition;

   701   current = origin;

   702   end = mEndPosition;

   704   bool haveCR = false;

   706   while(!done && current != end) {

   707     switch(theChar) {

   708       case '\n':

   709       case '\r':

   710         {

   711           ++aNewlinesSkipped;

   712           char16_t thePrevChar = theChar;

   713           theChar = (++current != end) ? *current : '\0';

   714           if ((thePrevChar == '\r' && theChar == '\n') ||

   715               (thePrevChar == '\n' && theChar == '\r')) {

   716             theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF

   717             haveCR = true;

   718           } else if (thePrevChar == '\r') {

   719             // Lone CR becomes CRLF; callers should know to remove extra CRs

   720             AppendUnicodeTo(origin, current, aString);

   721             aString.writable().Append(char16_t('\n'));

   722             origin = current;

   723             haveCR = true;

   724           }

   725         }

   726         break;

   727       case ' ' :

   728       case '\t':

   729         theChar = (++current != end) ? *current : '\0';

   730         break;

   731       default:

   732         done = true;

   733         AppendUnicodeTo(origin, current, aString);

   734         break;

   735     }

   736   }

   738   SetPosition(current);

   739   if (current == end) {

   740     AppendUnicodeTo(origin, current, aString);

   741     result = kEOF;

   742   }

   744   aHaveCR = haveCR;

   745   return result;

   746 }

   748 //XXXbz callers of this have to manage their lone '\r' themselves if they want

   749 //it to work.  Good thing they're all in view-source and it deals.

   750 nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart,

   751                                    nsScannerIterator& aEnd,

   752                                    int32_t& aNewlinesSkipped) {

   754   if (!mSlidingBuffer) {

   755     return kEOF;

   756   }

   758   char16_t theChar = 0;

   759   nsresult  result = Peek(theChar);

   761   if (NS_FAILED(result)) {

   762     return result;

   763   }

   765   nsScannerIterator origin, current, end;

   766   bool done = false;

   768   origin = mCurrentPosition;

   769   current = origin;

   770   end = mEndPosition;

   772   while(!done && current != end) {

   773     switch(theChar) {

   774       case '\n':

   775       case '\r': ++aNewlinesSkipped;

   776       case ' ' :

   777       case '\t':

   778         {

   779           char16_t thePrevChar = theChar;

   780           theChar = (++current != end) ? *current : '\0';

   781           if ((thePrevChar == '\r' && theChar == '\n') ||

   782               (thePrevChar == '\n' && theChar == '\r')) {

   783             theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF

   784           }

   785         }

   786         break;

   787       default:

   788         done = true;

   789         aStart = origin;

   790         aEnd = current;

   791         break;

   792     }

   793   }

   795   SetPosition(current);

   796   if (current == end) {

   797     aStart = origin;

   798     aEnd = current;

   799     result = kEOF;

   800   }

   802   return result;

   803 }

   805 /**

   806  *  Consume characters until you encounter one contained in given

   807  *  input set.

   808  *

   809  *  @update  gess 3/25/98

   810  *  @param   aString will contain the result of this method

   811  *  @param   aTerminalSet is an ordered string that contains

   812  *           the set of INVALID characters

   813  *  @return  error code

   814  */

   815 nsresult nsScanner::ReadUntil(nsAString& aString,

   816                               const nsReadEndCondition& aEndCondition,

   817                               bool addTerminal)

   818 {

   819   if (!mSlidingBuffer) {

   820     return kEOF;

   821   }

   823   nsScannerIterator origin, current;

   824   const char16_t* setstart = aEndCondition.mChars;

   825   const char16_t* setcurrent;

   827   origin = mCurrentPosition;

   828   current = origin;

   830   char16_t         theChar=0;

   831   nsresult          result=Peek(theChar);

   833   if (NS_FAILED(result)) {

   834     return result;

   835   }

   837   while (current != mEndPosition) {

   838     theChar = *current;

   839     if (theChar == '\0') {

   840       ReplaceCharacter(current, sInvalid);

   841       theChar = sInvalid;

   842     }

   844     // Filter out completely wrong characters

   845     // Check if all bits are in the required area

   846     if(!(theChar & aEndCondition.mFilter)) {

   847       // They were. Do a thorough check.

   849       setcurrent = setstart;

   850       while (*setcurrent) {

   851         if (*setcurrent == theChar) {

   852           if(addTerminal)

   853             ++current;

   854           AppendUnicodeTo(origin, current, aString);

   855           SetPosition(current);

   857           //DoErrTest(aString);

   859           return NS_OK;

   860         }

   861         ++setcurrent;

   862       }

   863     }

   865     ++current;

   866   }

   868   // If we are here, we didn't find any terminator in the string and

   869   // current = mEndPosition

   870   SetPosition(current);

   871   AppendUnicodeTo(origin, current, aString);

   872   return kEOF;

   873 }

   875 nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString,

   876                               const nsReadEndCondition& aEndCondition,

   877                               bool addTerminal)

   878 {

   879   if (!mSlidingBuffer) {

   880     return kEOF;

   881   }

   883   nsScannerIterator origin, current;

   884   const char16_t* setstart = aEndCondition.mChars;

   885   const char16_t* setcurrent;

   887   origin = mCurrentPosition;

   888   current = origin;

   890   char16_t         theChar=0;

   891   nsresult          result=Peek(theChar);

   893   if (NS_FAILED(result)) {

   894     return result;

   895   }

   897   while (current != mEndPosition) {

   898     theChar = *current;

   899     if (theChar == '\0') {

   900       ReplaceCharacter(current, sInvalid);

   901       theChar = sInvalid;

   902     }

   904     // Filter out completely wrong characters

   905     // Check if all bits are in the required area

   906     if(!(theChar & aEndCondition.mFilter)) {

   907       // They were. Do a thorough check.

   909       setcurrent = setstart;

   910       while (*setcurrent) {

   911         if (*setcurrent == theChar) {

   912           if(addTerminal)

   913             ++current;

   914           AppendUnicodeTo(origin, current, aString);

   915           SetPosition(current);

   917           //DoErrTest(aString);

   919           return NS_OK;

   920         }

   921         ++setcurrent;

   922       }

   923     }

   925     ++current;

   926   }

   928   // If we are here, we didn't find any terminator in the string and

   929   // current = mEndPosition

   930   SetPosition(current);

   931   AppendUnicodeTo(origin, current, aString);

   932   return kEOF;

   933 }

   935 nsresult nsScanner::ReadUntil(nsScannerIterator& aStart,

   936                               nsScannerIterator& aEnd,

   937                               const nsReadEndCondition &aEndCondition,

   938                               bool addTerminal)

   939 {

   940   if (!mSlidingBuffer) {

   941     return kEOF;

   942   }

   944   nsScannerIterator origin, current;

   945   const char16_t* setstart = aEndCondition.mChars;

   946   const char16_t* setcurrent;

   948   origin = mCurrentPosition;

   949   current = origin;

   951   char16_t         theChar=0;

   952   nsresult          result=Peek(theChar);

   954   if (NS_FAILED(result)) {

   955     aStart = aEnd = current;

   956     return result;

   957   }

   959   while (current != mEndPosition) {

   960     theChar = *current;

   961     if (theChar == '\0') {

   962       ReplaceCharacter(current, sInvalid);

   963       theChar = sInvalid;

   964     }

   966     // Filter out completely wrong characters

   967     // Check if all bits are in the required area

   968     if(!(theChar & aEndCondition.mFilter)) {

   969       // They were. Do a thorough check.

   970       setcurrent = setstart;

   971       while (*setcurrent) {

   972         if (*setcurrent == theChar) {

   973           if(addTerminal)

   974             ++current;

   975           aStart = origin;

   976           aEnd = current;

   977           SetPosition(current);

   979           return NS_OK;

   980         }

   981         ++setcurrent;

   982       }

   983     }

   985     ++current;

   986   }

   988   // If we are here, we didn't find any terminator in the string and

   989   // current = mEndPosition

   990   SetPosition(current);

   991   aStart = origin;

   992   aEnd = current;

   993   return kEOF;

   994 }

   996 /**

   997  *  Consumes chars until you see the given terminalChar

   998  *

   999  *  @update  gess 3/25/98

  1000  *  @param

  1001  *  @return  error code

  1002  */

  1003 nsresult nsScanner::ReadUntil(nsAString& aString,

  1004                               char16_t aTerminalChar,

  1005                               bool addTerminal)

  1006 {

  1007   if (!mSlidingBuffer) {

  1008     return kEOF;

  1009   }

  1011   nsScannerIterator origin, current;

  1013   origin = mCurrentPosition;

  1014   current = origin;

  1016   char16_t theChar;

  1017   nsresult result = Peek(theChar);

  1019   if (NS_FAILED(result)) {

  1020     return result;

  1021   }

  1023   while (current != mEndPosition) {

  1024     theChar = *current;

  1025     if (theChar == '\0') {

  1026       ReplaceCharacter(current, sInvalid);

  1027       theChar = sInvalid;

  1028     }

  1030     if (aTerminalChar == theChar) {

  1031       if(addTerminal)

  1032         ++current;

  1033       AppendUnicodeTo(origin, current, aString);

  1034       SetPosition(current);

  1035       return NS_OK;

  1036     }

  1037     ++current;

  1038   }

  1040   // If we are here, we didn't find any terminator in the string and

  1041   // current = mEndPosition

  1042   AppendUnicodeTo(origin, current, aString);

  1043   SetPosition(current);

  1044   return kEOF;

  1046 }

  1048 void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)

  1049 {

  1050   aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);

  1051 }

  1053 void nsScanner::CurrentPosition(nsScannerIterator& aPosition)

  1054 {

  1055   aPosition = mCurrentPosition;

  1056 }

  1058 void nsScanner::EndReading(nsScannerIterator& aPosition)

  1059 {

  1060   aPosition = mEndPosition;

  1061 }

  1063 void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate, bool aReverse)

  1064 {

  1065   if (mSlidingBuffer) {

  1066 #ifdef DEBUG

  1067     uint32_t origRemaining = mCountRemaining;

  1068 #endif

  1070     if (aReverse) {

  1071       mCountRemaining += (Distance(aPosition, mCurrentPosition));

  1072     }

  1073     else {

  1074       mCountRemaining -= (Distance(mCurrentPosition, aPosition));

  1075     }

  1077     NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) ||

  1078                  (mCountRemaining <= origRemaining && !aReverse),

  1079                  "Improper use of nsScanner::SetPosition. Make sure to set the"

  1080                  " aReverse parameter correctly");

  1082     mCurrentPosition = aPosition;

  1083     if (aTerminate && (mCurrentPosition == mEndPosition)) {

  1084       mMarkPosition = mCurrentPosition;

  1085       mSlidingBuffer->DiscardPrefix(mCurrentPosition);

  1086     }

  1087   }

  1088 }

  1090 void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,

  1091                                  char16_t aChar)

  1092 {

  1093   if (mSlidingBuffer) {

  1094     mSlidingBuffer->ReplaceCharacter(aPosition, aChar);

  1095   }

  1096 }

  1098 bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,

  1099                                  nsIRequest *aRequest,

  1100                                  int32_t aErrorPos)

  1101 {

  1102   uint32_t countRemaining = mCountRemaining;

  1103   if (!mSlidingBuffer) {

  1104     mSlidingBuffer = new nsScannerString(aBuf);

  1105     if (!mSlidingBuffer)

  1106       return false;

  1107     mSlidingBuffer->BeginReading(mCurrentPosition);

  1108     mMarkPosition = mCurrentPosition;

  1109     mSlidingBuffer->EndReading(mEndPosition);

  1110     mCountRemaining = aBuf->DataLength();

  1111   }

  1112   else {

  1113     mSlidingBuffer->AppendBuffer(aBuf);

  1114     if (mCurrentPosition == mEndPosition) {

  1115       mSlidingBuffer->BeginReading(mCurrentPosition);

  1116     }

  1117     mSlidingBuffer->EndReading(mEndPosition);

  1118     mCountRemaining += aBuf->DataLength();

  1119   }

  1121   if (aErrorPos != -1 && !mHasInvalidCharacter) {

  1122     mHasInvalidCharacter = true;

  1123     mFirstInvalidPosition = mCurrentPosition;

  1124     mFirstInvalidPosition.advance(countRemaining + aErrorPos);

  1125   }

  1127   if (mFirstNonWhitespacePosition == -1) {

  1128     nsScannerIterator iter(mCurrentPosition);

  1129     nsScannerIterator end(mEndPosition);

  1131     while (iter != end) {

  1132       if (!nsCRT::IsAsciiSpace(*iter)) {

  1133         mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);

  1135         break;

  1136       }

  1138       ++iter;

  1139     }

  1140   }

  1141   return true;

  1142 }

  1144 /**

  1145  *  call this to copy bytes out of the scanner that have not yet been consumed

  1146  *  by the tokenization process.

  1147  *

  1148  *  @update  gess 5/12/98

  1149  *  @param   aCopyBuffer is where the scanner buffer will be copied to

  1150  *  @return  nada

  1151  */

  1152 void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {

  1153   if (!mSlidingBuffer) {

  1154     aCopyBuffer.Truncate();

  1155     return;

  1156   }

  1158   nsScannerIterator start, end;

  1159   start = mCurrentPosition;

  1160   end = mEndPosition;

  1162   CopyUnicodeTo(start, end, aCopyBuffer);

  1163 }

  1165 /**

  1166  *  Retrieve the name of the file that the scanner is reading from.

  1167  *  In some cases, it's just a given name, because the scanner isn't

  1168  *  really reading from a file.

  1169  *

  1170  *  @update  gess 5/12/98

  1171  *  @return

  1172  */

  1173 nsString& nsScanner::GetFilename(void) {

  1174   return mFilename;

  1175 }

  1177 /**

  1178  *  Conduct self test. Actually, selftesting for this class

  1179  *  occurs in the parser selftest.

  1180  *

  1181  *  @update  gess 3/25/98

  1182  *  @param

  1183  *  @return

  1184  */

  1186 void nsScanner::SelfTest(void) {

  1187 #ifdef _DEBUG

  1188 #endif

  1189 }

  1191 void nsScanner::OverrideReplacementCharacter(char16_t aReplacementCharacter)

  1192 {

  1193   mReplacementCharacter = aReplacementCharacter;

  1195   if (mHasInvalidCharacter) {

  1196     ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);

  1197   }

  1198 }

The Tor Browser / file revision

parser/htmlparser/src/nsScanner.cpp@6474c204b198

parser/htmlparser/src/nsScanner.cpp