parser/htmlparser/src/nsScanner.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim: set ts=2 sw=2 et tw=78: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 //#define __INCREMENTAL 1
     9 #include "mozilla/DebugOnly.h"
    11 #include "nsScanner.h"
    12 #include "nsDebug.h"
    13 #include "nsReadableUtils.h"
    14 #include "nsIInputStream.h"
    15 #include "nsIFile.h"
    16 #include "nsNetUtil.h"
    17 #include "nsUTF8Utils.h" // for LossyConvertEncoding
    18 #include "nsCRT.h"
    19 #include "nsParser.h"
    20 #include "nsCharsetSource.h"
    22 #include "mozilla/dom/EncodingUtils.h"
    24 using mozilla::dom::EncodingUtils;
    26 // We replace NUL characters with this character.
    27 static char16_t sInvalid = UCS2_REPLACEMENT_CHAR;
    29 nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :
    30   mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set
    31 {
    32   // Build filter that will be used to filter out characters with
    33   // bits that none of the terminal chars have. This works very well
    34   // because terminal chars often have only the last 4-6 bits set and
    35   // normal ascii letters have bit 7 set. Other letters have even higher
    36   // bits set.
    38   // Calculate filter
    39   const char16_t *current = aTerminateChars;
    40   char16_t terminalChar = *current;
    41   while (terminalChar) {
    42     mFilter &= ~terminalChar;
    43     ++current;
    44     terminalChar = *current;
    45   }
    46 }
    48 /**
    49  *  Use this constructor if you want i/o to be based on 
    50  *  a single string you hand in during construction.
    51  *  This short cut was added for Javascript.
    52  *
    53  *  @update  gess 5/12/98
    54  *  @param   aMode represents the parser mode (nav, other)
    55  *  @return  
    56  */
    57 nsScanner::nsScanner(const nsAString& anHTMLString)
    58 {
    59   MOZ_COUNT_CTOR(nsScanner);
    61   mSlidingBuffer = nullptr;
    62   mCountRemaining = 0;
    63   mFirstNonWhitespacePosition = -1;
    64   if (AppendToBuffer(anHTMLString)) {
    65     mSlidingBuffer->BeginReading(mCurrentPosition);
    66   } else {
    67     /* XXX see hack below, re: bug 182067 */
    68     memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
    69     mEndPosition = mCurrentPosition;
    70   }
    71   mMarkPosition = mCurrentPosition;
    72   mIncremental = false;
    73   mUnicodeDecoder = 0;
    74   mCharsetSource = kCharsetUninitialized;
    75   mHasInvalidCharacter = false;
    76   mReplacementCharacter = char16_t(0x0);
    77 }
    79 /**
    80  *  Use this constructor if you want i/o to be based on strings 
    81  *  the scanner receives. If you pass a null filename, you
    82  *  can still provide data to the scanner via append.
    83  */
    84 nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
    85   : mFilename(aFilename)
    86 {
    87   MOZ_COUNT_CTOR(nsScanner);
    88   NS_ASSERTION(!aCreateStream, "This is always true.");
    90   mSlidingBuffer = nullptr;
    92   // XXX This is a big hack.  We need to initialize the iterators to something.
    93   // What matters is that mCurrentPosition == mEndPosition, so that our methods
    94   // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
    95   // so that we have some hope of catching null pointer dereferences associated
    96   // with this hack. --darin
    97   memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
    98   mMarkPosition = mCurrentPosition;
    99   mEndPosition = mCurrentPosition;
   101   mIncremental = true;
   102   mFirstNonWhitespacePosition = -1;
   103   mCountRemaining = 0;
   105   mUnicodeDecoder = 0;
   106   mCharsetSource = kCharsetUninitialized;
   107   mHasInvalidCharacter = false;
   108   mReplacementCharacter = char16_t(0x0);
   109   // XML defaults to UTF-8 and about:blank is UTF-8, too.
   110   SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault);
   111 }
   113 nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource)
   114 {
   115   if (aSource < mCharsetSource) // priority is lower than the current one
   116     return NS_OK;
   118   mCharsetSource = aSource;
   120   nsCString charsetName;
   121   mozilla::DebugOnly<bool> valid =
   122       EncodingUtils::FindEncodingForLabel(aCharset, charsetName);
   123   MOZ_ASSERT(valid, "Should never call with a bogus aCharset.");
   125   if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
   126     return NS_OK; // no difference, don't change it
   127   }
   129   // different, need to change it
   131   mCharset.Assign(charsetName);
   133   mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
   134   mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
   136   return NS_OK;
   137 }
   140 /**
   141  *  default destructor
   142  *  
   143  *  @update  gess 3/25/98
   144  *  @param   
   145  *  @return  
   146  */
   147 nsScanner::~nsScanner() {
   149   delete mSlidingBuffer;
   151   MOZ_COUNT_DTOR(nsScanner);
   152 }
   154 /**
   155  *  Resets current offset position of input stream to marked position. 
   156  *  This allows us to back up to this point if the need should arise, 
   157  *  such as when tokenization gets interrupted.
   158  *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
   159  *
   160  *  @update  gess 5/12/98
   161  *  @param   
   162  *  @return  
   163  */
   164 void nsScanner::RewindToMark(void){
   165   if (mSlidingBuffer) {
   166     mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));
   167     mCurrentPosition = mMarkPosition;
   168   }
   169 }
   172 /**
   173  *  Records current offset position in input stream. This allows us
   174  *  to back up to this point if the need should arise, such as when
   175  *  tokenization gets interrupted.
   176  *
   177  *  @update  gess 7/29/98
   178  *  @param   
   179  *  @return  
   180  */
   181 int32_t nsScanner::Mark() {
   182   int32_t distance = 0;
   183   if (mSlidingBuffer) {
   184     nsScannerIterator oldStart;
   185     mSlidingBuffer->BeginReading(oldStart);
   187     distance = Distance(oldStart, mCurrentPosition);
   189     mSlidingBuffer->DiscardPrefix(mCurrentPosition);
   190     mSlidingBuffer->BeginReading(mCurrentPosition);
   191     mMarkPosition = mCurrentPosition;
   192   }
   194   return distance;
   195 }
   197 /** 
   198  * Insert data to our underlying input buffer as
   199  * if it were read from an input stream.
   200  *
   201  * @update  harishd 01/12/99
   202  * @return  error code 
   203  */
   204 bool nsScanner::UngetReadable(const nsAString& aBuffer) {
   205   if (!mSlidingBuffer) {
   206     return false;
   207   }
   209   mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
   210   mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
   211   mSlidingBuffer->EndReading(mEndPosition);
   213   uint32_t length = aBuffer.Length();
   214   mCountRemaining += length; // Ref. bug 117441
   215   return true;
   216 }
   218 /** 
   219  * Append data to our underlying input buffer as
   220  * if it were read from an input stream.
   221  *
   222  * @update  gess4/3/98
   223  * @return  error code 
   224  */
   225 nsresult nsScanner::Append(const nsAString& aBuffer) {
   226   if (!AppendToBuffer(aBuffer))
   227     return NS_ERROR_OUT_OF_MEMORY;
   228   return NS_OK;
   229 }
   231 /**
   232  *  
   233  *  
   234  *  @update  gess 5/21/98
   235  *  @param   
   236  *  @return  
   237  */
   238 nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen,
   239                            nsIRequest *aRequest)
   240 {
   241   nsresult res = NS_OK;
   242   if (mUnicodeDecoder) {
   243     int32_t unicharBufLen = 0;
   244     mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
   245     nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
   246     NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
   247     char16_t *unichars = buffer->DataStart();
   249     int32_t totalChars = 0;
   250     int32_t unicharLength = unicharBufLen;
   251     int32_t errorPos = -1;
   253     do {
   254       int32_t srcLength = aLen;
   255       res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
   257       totalChars += unicharLength;
   258       // Continuation of failure case
   259       if(NS_FAILED(res)) {
   260         // if we failed, we consume one byte, replace it with the replacement
   261         // character and try the conversion again.
   263         // This is only needed because some decoders don't follow the
   264         // nsIUnicodeDecoder contract: they return a failure when *aDestLength
   265         // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT.  See bug 244177
   266         if ((unichars + unicharLength) >= buffer->DataEnd()) {
   267           NS_ERROR("Unexpected end of destination buffer");
   268           break;
   269         }
   271         if (mReplacementCharacter == 0x0 && errorPos == -1) {
   272           errorPos = totalChars;
   273         }
   274         unichars[unicharLength++] = mReplacementCharacter == 0x0 ?
   275                                     mUnicodeDecoder->GetCharacterForUnMapped() :
   276                                     mReplacementCharacter;
   278         unichars = unichars + unicharLength;
   279         unicharLength = unicharBufLen - (++totalChars);
   281         mUnicodeDecoder->Reset();
   283         if(((uint32_t) (srcLength + 1)) > aLen) {
   284           srcLength = aLen;
   285         }
   286         else {
   287           ++srcLength;
   288         }
   290         aBuffer += srcLength;
   291         aLen -= srcLength;
   292       }
   293     } while (NS_FAILED(res) && (aLen > 0));
   295     buffer->SetDataLength(totalChars);
   296     // Don't propagate return code of unicode decoder
   297     // since it doesn't reflect on our success or failure
   298     // - Ref. bug 87110
   299     res = NS_OK; 
   300     if (!AppendToBuffer(buffer, aRequest, errorPos))
   301       res = NS_ERROR_OUT_OF_MEMORY;
   302   }
   303   else {
   304     NS_WARNING("No decoder found.");
   305     res = NS_ERROR_FAILURE;
   306   }
   308   return res;
   309 }
   311 /**
   312  *  retrieve next char from scanners internal input stream
   313  *  
   314  *  @update  gess 3/25/98
   315  *  @param   
   316  *  @return  error code reflecting read status
   317  */
   318 nsresult nsScanner::GetChar(char16_t& aChar) {
   319   if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
   320     aChar = 0;
   321     return kEOF;
   322   }
   324   aChar = *mCurrentPosition++;
   325   --mCountRemaining;
   327   return NS_OK;
   328 }
   331 /**
   332  *  peek ahead to consume next char from scanner's internal
   333  *  input buffer
   334  *  
   335  *  @update  gess 3/25/98
   336  *  @param   
   337  *  @return  
   338  */
   339 nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {
   340   aChar = 0;
   342   if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
   343     return kEOF;
   344   }
   346   if (aOffset > 0) {
   347     if (mCountRemaining <= aOffset)
   348       return kEOF;
   350     nsScannerIterator pos = mCurrentPosition;
   351     pos.advance(aOffset);
   352     aChar=*pos;
   353   }
   354   else {
   355     aChar=*mCurrentPosition;
   356   }
   358   return NS_OK;
   359 }
   361 nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)
   362 {
   363   if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
   364     return kEOF;
   365   }
   367   nsScannerIterator start, end;
   369   start = mCurrentPosition;
   371   if ((int32_t)mCountRemaining <= aOffset) {
   372     return kEOF;
   373   }
   375   if (aOffset > 0) {
   376     start.advance(aOffset);
   377   }
   379   if (mCountRemaining < uint32_t(aNumChars + aOffset)) {
   380     end = mEndPosition;
   381   }
   382   else {
   383     end = start;
   384     end.advance(aNumChars);
   385   }
   387   CopyUnicodeTo(start, end, aStr);
   389   return NS_OK;
   390 }
   393 /**
   394  *  Skip whitespace on scanner input stream
   395  *  
   396  *  @update  gess 3/25/98
   397  *  @param   
   398  *  @return  error status
   399  */
   400 nsresult nsScanner::SkipWhitespace(int32_t& aNewlinesSkipped) {
   402   if (!mSlidingBuffer) {
   403     return kEOF;
   404   }
   406   char16_t theChar = 0;
   407   nsresult  result = Peek(theChar);
   409   if (NS_FAILED(result)) {
   410     return result;
   411   }
   413   nsScannerIterator current = mCurrentPosition;
   414   bool      done = false;
   415   bool      skipped = false;
   417   while (!done && current != mEndPosition) {
   418     switch(theChar) {
   419       case '\n':
   420       case '\r': ++aNewlinesSkipped;
   421       case ' ' :
   422       case '\t':
   423         {
   424           skipped = true;
   425           char16_t thePrevChar = theChar;
   426           theChar = (++current != mEndPosition) ? *current : '\0';
   427           if ((thePrevChar == '\r' && theChar == '\n') ||
   428               (thePrevChar == '\n' && theChar == '\r')) {
   429             theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF
   430           }
   431         }
   432         break;
   433       default:
   434         done = true;
   435         break;
   436     }
   437   }
   439   if (skipped) {
   440     SetPosition(current);
   441     if (current == mEndPosition) {
   442       result = kEOF;
   443     }
   444   }
   446   return result;
   447 }
   449 /**
   450  *  Skip over chars as long as they equal given char
   451  *  
   452  *  @update  gess 3/25/98
   453  *  @param   
   454  *  @return  error code
   455  */
   456 nsresult nsScanner::SkipOver(char16_t aSkipChar){
   458   if (!mSlidingBuffer) {
   459     return kEOF;
   460   }
   462   char16_t ch=0;
   463   nsresult   result=NS_OK;
   465   while(NS_OK==result) {
   466     result=Peek(ch);
   467     if(NS_OK == result) {
   468       if(ch!=aSkipChar) {
   469         break;
   470       }
   471       GetChar(ch);
   472     } 
   473     else break;
   474   } //while
   475   return result;
   477 }
   479 #if 0
   480 void DoErrTest(nsString& aString) {
   481   int32_t pos=aString.FindChar(0);
   482   if(kNotFound<pos) {
   483     if(aString.Length()-1!=pos) {
   484     }
   485   }
   486 }
   488 void DoErrTest(nsCString& aString) {
   489   int32_t pos=aString.FindChar(0);
   490   if(kNotFound<pos) {
   491     if(aString.Length()-1!=pos) {
   492     }
   493   }
   494 }
   495 #endif
   497 /**
   498  *  Consume characters until you run into space, a '<', a '>', or a '/'.
   499  *  
   500  *  @param   aString - receives new data from stream
   501  *  @return  error code
   502  */
   503 nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) {
   505   if (!mSlidingBuffer) {
   506     return kEOF;
   507   }
   509   char16_t         theChar=0;
   510   nsresult          result=Peek(theChar);
   511   nsScannerIterator current, end;
   512   bool              found=false;  
   514   current = mCurrentPosition;
   515   end = mEndPosition;
   517   // Loop until we find an illegal character. Everything is then appended
   518   // later.
   519   while(current != end && !found) {
   520     theChar=*current;
   522     switch(theChar) {
   523       case '\n':
   524       case '\r':
   525       case ' ' :
   526       case '\t':
   527       case '\v':
   528       case '\f':
   529       case '<':
   530       case '>':
   531       case '/':
   532         found = true;
   533         break;
   535       case '\0':
   536         ReplaceCharacter(current, sInvalid);
   537         break;
   539       default:
   540         break;
   541     }
   543     if (!found) {
   544       ++current;
   545     }
   546   }
   548   // Don't bother appending nothing.
   549   if (current != mCurrentPosition) {
   550     AppendUnicodeTo(mCurrentPosition, current, aString);
   551   }
   553   SetPosition(current);  
   554   if (current == end) {
   555     result = kEOF;
   556   }
   558   //DoErrTest(aString);
   560   return result;
   561 }
   563 /**
   564  *  Consume characters until you run into a char that's not valid in an
   565  *  entity name
   566  *  
   567  *  @param   aString - receives new data from stream
   568  *  @return  error code
   569  */
   570 nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
   572   if (!mSlidingBuffer) {
   573     return kEOF;
   574   }
   576   char16_t         theChar=0;
   577   nsresult          result=Peek(theChar);
   578   nsScannerIterator origin, current, end;
   579   bool              found=false;  
   581   origin = mCurrentPosition;
   582   current = mCurrentPosition;
   583   end = mEndPosition;
   585   while(current != end) {
   587     theChar=*current;
   588     if(theChar) {
   589       found=false;
   590       switch(theChar) {
   591         case '_':
   592         case '-':
   593         case '.':
   594           // Don't allow ':' in entity names.  See bug 23791
   595           found = true;
   596           break;
   597         default:
   598           found = ('a'<=theChar && theChar<='z') ||
   599                   ('A'<=theChar && theChar<='Z') ||
   600                   ('0'<=theChar && theChar<='9');
   601           break;
   602       }
   604       if(!found) {
   605         AppendUnicodeTo(mCurrentPosition, current, aString);
   606         break;
   607       }
   608     }
   609     ++current;
   610   }
   612   SetPosition(current);
   613   if (current == end) {
   614     AppendUnicodeTo(origin, current, aString);
   615     return kEOF;
   616   }
   618   //DoErrTest(aString);
   620   return result;
   621 }
   623 /**
   624  *  Consume digits 
   625  *  
   626  *  @param   aString - should contain digits
   627  *  @return  error code
   628  */
   629 nsresult nsScanner::ReadNumber(nsString& aString,int32_t aBase) {
   631   if (!mSlidingBuffer) {
   632     return kEOF;
   633   }
   635   NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
   637   char16_t         theChar=0;
   638   nsresult          result=Peek(theChar);
   639   nsScannerIterator origin, current, end;
   641   origin = mCurrentPosition;
   642   current = origin;
   643   end = mEndPosition;
   645   bool done = false;
   646   while(current != end) {
   647     theChar=*current;
   648     if(theChar) {
   649       done = (theChar < '0' || theChar > '9') && 
   650              ((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
   651                              (theChar < 'a' || theChar > 'f')
   652                              :true);
   653       if(done) {
   654         AppendUnicodeTo(origin, current, aString);
   655         break;
   656       }
   657     }
   658     ++current;
   659   }
   661   SetPosition(current);
   662   if (current == end) {
   663     AppendUnicodeTo(origin, current, aString);
   664     return kEOF;
   665   }
   667   //DoErrTest(aString);
   669   return result;
   670 }
   672 /**
   673  *  Consume characters until you find the terminal char
   674  *  
   675  *  @update  gess 3/25/98
   676  *  @param   aString receives new data from stream
   677  *  @param   addTerminal tells us whether to append terminal to aString
   678  *  @return  error code
   679  */
   680 nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString,
   681                                    int32_t& aNewlinesSkipped,
   682                                    bool& aHaveCR) {
   684   aHaveCR = false;
   686   if (!mSlidingBuffer) {
   687     return kEOF;
   688   }
   690   char16_t theChar = 0;
   691   nsresult  result = Peek(theChar);
   693   if (NS_FAILED(result)) {
   694     return result;
   695   }
   697   nsScannerIterator origin, current, end;
   698   bool done = false;  
   700   origin = mCurrentPosition;
   701   current = origin;
   702   end = mEndPosition;
   704   bool haveCR = false;
   706   while(!done && current != end) {
   707     switch(theChar) {
   708       case '\n':
   709       case '\r':
   710         {
   711           ++aNewlinesSkipped;
   712           char16_t thePrevChar = theChar;
   713           theChar = (++current != end) ? *current : '\0';
   714           if ((thePrevChar == '\r' && theChar == '\n') ||
   715               (thePrevChar == '\n' && theChar == '\r')) {
   716             theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
   717             haveCR = true;
   718           } else if (thePrevChar == '\r') {
   719             // Lone CR becomes CRLF; callers should know to remove extra CRs
   720             AppendUnicodeTo(origin, current, aString);
   721             aString.writable().Append(char16_t('\n'));
   722             origin = current;
   723             haveCR = true;
   724           }
   725         }
   726         break;
   727       case ' ' :
   728       case '\t':
   729         theChar = (++current != end) ? *current : '\0';
   730         break;
   731       default:
   732         done = true;
   733         AppendUnicodeTo(origin, current, aString);
   734         break;
   735     }
   736   }
   738   SetPosition(current);
   739   if (current == end) {
   740     AppendUnicodeTo(origin, current, aString);
   741     result = kEOF;
   742   }
   744   aHaveCR = haveCR;
   745   return result;
   746 }
   748 //XXXbz callers of this have to manage their lone '\r' themselves if they want
   749 //it to work.  Good thing they're all in view-source and it deals.
   750 nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart, 
   751                                    nsScannerIterator& aEnd,
   752                                    int32_t& aNewlinesSkipped) {
   754   if (!mSlidingBuffer) {
   755     return kEOF;
   756   }
   758   char16_t theChar = 0;
   759   nsresult  result = Peek(theChar);
   761   if (NS_FAILED(result)) {
   762     return result;
   763   }
   765   nsScannerIterator origin, current, end;
   766   bool done = false;  
   768   origin = mCurrentPosition;
   769   current = origin;
   770   end = mEndPosition;
   772   while(!done && current != end) {
   773     switch(theChar) {
   774       case '\n':
   775       case '\r': ++aNewlinesSkipped;
   776       case ' ' :
   777       case '\t':
   778         {
   779           char16_t thePrevChar = theChar;
   780           theChar = (++current != end) ? *current : '\0';
   781           if ((thePrevChar == '\r' && theChar == '\n') ||
   782               (thePrevChar == '\n' && theChar == '\r')) {
   783             theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
   784           }
   785         }
   786         break;
   787       default:
   788         done = true;
   789         aStart = origin;
   790         aEnd = current;
   791         break;
   792     }
   793   }
   795   SetPosition(current);
   796   if (current == end) {
   797     aStart = origin;
   798     aEnd = current;
   799     result = kEOF;
   800   }
   802   return result;
   803 }
   805 /**
   806  *  Consume characters until you encounter one contained in given
   807  *  input set.
   808  *  
   809  *  @update  gess 3/25/98
   810  *  @param   aString will contain the result of this method
   811  *  @param   aTerminalSet is an ordered string that contains
   812  *           the set of INVALID characters
   813  *  @return  error code
   814  */
   815 nsresult nsScanner::ReadUntil(nsAString& aString,
   816                               const nsReadEndCondition& aEndCondition,
   817                               bool addTerminal)
   818 {  
   819   if (!mSlidingBuffer) {
   820     return kEOF;
   821   }
   823   nsScannerIterator origin, current;
   824   const char16_t* setstart = aEndCondition.mChars;
   825   const char16_t* setcurrent;
   827   origin = mCurrentPosition;
   828   current = origin;
   830   char16_t         theChar=0;
   831   nsresult          result=Peek(theChar);
   833   if (NS_FAILED(result)) {
   834     return result;
   835   }
   837   while (current != mEndPosition) {
   838     theChar = *current;
   839     if (theChar == '\0') {
   840       ReplaceCharacter(current, sInvalid);
   841       theChar = sInvalid;
   842     }
   844     // Filter out completely wrong characters
   845     // Check if all bits are in the required area
   846     if(!(theChar & aEndCondition.mFilter)) {
   847       // They were. Do a thorough check.
   849       setcurrent = setstart;
   850       while (*setcurrent) {
   851         if (*setcurrent == theChar) {
   852           if(addTerminal)
   853             ++current;
   854           AppendUnicodeTo(origin, current, aString);
   855           SetPosition(current);
   857           //DoErrTest(aString);
   859           return NS_OK;
   860         }
   861         ++setcurrent;
   862       }
   863     }
   865     ++current;
   866   }
   868   // If we are here, we didn't find any terminator in the string and
   869   // current = mEndPosition
   870   SetPosition(current);
   871   AppendUnicodeTo(origin, current, aString);
   872   return kEOF;
   873 }
   875 nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString,
   876                               const nsReadEndCondition& aEndCondition,
   877                               bool addTerminal)
   878 {  
   879   if (!mSlidingBuffer) {
   880     return kEOF;
   881   }
   883   nsScannerIterator origin, current;
   884   const char16_t* setstart = aEndCondition.mChars;
   885   const char16_t* setcurrent;
   887   origin = mCurrentPosition;
   888   current = origin;
   890   char16_t         theChar=0;
   891   nsresult          result=Peek(theChar);
   893   if (NS_FAILED(result)) {
   894     return result;
   895   }
   897   while (current != mEndPosition) {
   898     theChar = *current;
   899     if (theChar == '\0') {
   900       ReplaceCharacter(current, sInvalid);
   901       theChar = sInvalid;
   902     }
   904     // Filter out completely wrong characters
   905     // Check if all bits are in the required area
   906     if(!(theChar & aEndCondition.mFilter)) {
   907       // They were. Do a thorough check.
   909       setcurrent = setstart;
   910       while (*setcurrent) {
   911         if (*setcurrent == theChar) {
   912           if(addTerminal)
   913             ++current;
   914           AppendUnicodeTo(origin, current, aString);
   915           SetPosition(current);
   917           //DoErrTest(aString);
   919           return NS_OK;
   920         }
   921         ++setcurrent;
   922       }
   923     }
   925     ++current;
   926   }
   928   // If we are here, we didn't find any terminator in the string and
   929   // current = mEndPosition
   930   SetPosition(current);
   931   AppendUnicodeTo(origin, current, aString);
   932   return kEOF;
   933 }
   935 nsresult nsScanner::ReadUntil(nsScannerIterator& aStart, 
   936                               nsScannerIterator& aEnd,
   937                               const nsReadEndCondition &aEndCondition,
   938                               bool addTerminal)
   939 {
   940   if (!mSlidingBuffer) {
   941     return kEOF;
   942   }
   944   nsScannerIterator origin, current;
   945   const char16_t* setstart = aEndCondition.mChars;
   946   const char16_t* setcurrent;
   948   origin = mCurrentPosition;
   949   current = origin;
   951   char16_t         theChar=0;
   952   nsresult          result=Peek(theChar);
   954   if (NS_FAILED(result)) {
   955     aStart = aEnd = current;
   956     return result;
   957   }
   959   while (current != mEndPosition) {
   960     theChar = *current;
   961     if (theChar == '\0') {
   962       ReplaceCharacter(current, sInvalid);
   963       theChar = sInvalid;
   964     }
   966     // Filter out completely wrong characters
   967     // Check if all bits are in the required area
   968     if(!(theChar & aEndCondition.mFilter)) {
   969       // They were. Do a thorough check.
   970       setcurrent = setstart;
   971       while (*setcurrent) {
   972         if (*setcurrent == theChar) {
   973           if(addTerminal)
   974             ++current;
   975           aStart = origin;
   976           aEnd = current;
   977           SetPosition(current);
   979           return NS_OK;
   980         }
   981         ++setcurrent;
   982       }
   983     }
   985     ++current;
   986   }
   988   // If we are here, we didn't find any terminator in the string and
   989   // current = mEndPosition
   990   SetPosition(current);
   991   aStart = origin;
   992   aEnd = current;
   993   return kEOF;
   994 }
   996 /**
   997  *  Consumes chars until you see the given terminalChar
   998  *  
   999  *  @update  gess 3/25/98
  1000  *  @param   
  1001  *  @return  error code
  1002  */
  1003 nsresult nsScanner::ReadUntil(nsAString& aString,
  1004                               char16_t aTerminalChar,
  1005                               bool addTerminal)
  1007   if (!mSlidingBuffer) {
  1008     return kEOF;
  1011   nsScannerIterator origin, current;
  1013   origin = mCurrentPosition;
  1014   current = origin;
  1016   char16_t theChar;
  1017   nsresult result = Peek(theChar);
  1019   if (NS_FAILED(result)) {
  1020     return result;
  1023   while (current != mEndPosition) {
  1024     theChar = *current;
  1025     if (theChar == '\0') {
  1026       ReplaceCharacter(current, sInvalid);
  1027       theChar = sInvalid;
  1030     if (aTerminalChar == theChar) {
  1031       if(addTerminal)
  1032         ++current;
  1033       AppendUnicodeTo(origin, current, aString);
  1034       SetPosition(current);
  1035       return NS_OK;
  1037     ++current;
  1040   // If we are here, we didn't find any terminator in the string and
  1041   // current = mEndPosition
  1042   AppendUnicodeTo(origin, current, aString);
  1043   SetPosition(current);
  1044   return kEOF;
  1048 void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
  1050   aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
  1053 void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
  1055   aPosition = mCurrentPosition;
  1058 void nsScanner::EndReading(nsScannerIterator& aPosition)
  1060   aPosition = mEndPosition;
  1063 void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate, bool aReverse)
  1065   if (mSlidingBuffer) {
  1066 #ifdef DEBUG
  1067     uint32_t origRemaining = mCountRemaining;
  1068 #endif
  1070     if (aReverse) {
  1071       mCountRemaining += (Distance(aPosition, mCurrentPosition));
  1073     else {
  1074       mCountRemaining -= (Distance(mCurrentPosition, aPosition));
  1077     NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) ||
  1078                  (mCountRemaining <= origRemaining && !aReverse),
  1079                  "Improper use of nsScanner::SetPosition. Make sure to set the"
  1080                  " aReverse parameter correctly");
  1082     mCurrentPosition = aPosition;
  1083     if (aTerminate && (mCurrentPosition == mEndPosition)) {
  1084       mMarkPosition = mCurrentPosition;
  1085       mSlidingBuffer->DiscardPrefix(mCurrentPosition);
  1090 void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
  1091                                  char16_t aChar)
  1093   if (mSlidingBuffer) {
  1094     mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
  1098 bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
  1099                                  nsIRequest *aRequest,
  1100                                  int32_t aErrorPos)
  1102   uint32_t countRemaining = mCountRemaining;
  1103   if (!mSlidingBuffer) {
  1104     mSlidingBuffer = new nsScannerString(aBuf);
  1105     if (!mSlidingBuffer)
  1106       return false;
  1107     mSlidingBuffer->BeginReading(mCurrentPosition);
  1108     mMarkPosition = mCurrentPosition;
  1109     mSlidingBuffer->EndReading(mEndPosition);
  1110     mCountRemaining = aBuf->DataLength();
  1112   else {
  1113     mSlidingBuffer->AppendBuffer(aBuf);
  1114     if (mCurrentPosition == mEndPosition) {
  1115       mSlidingBuffer->BeginReading(mCurrentPosition);
  1117     mSlidingBuffer->EndReading(mEndPosition);
  1118     mCountRemaining += aBuf->DataLength();
  1121   if (aErrorPos != -1 && !mHasInvalidCharacter) {
  1122     mHasInvalidCharacter = true;
  1123     mFirstInvalidPosition = mCurrentPosition;
  1124     mFirstInvalidPosition.advance(countRemaining + aErrorPos);
  1127   if (mFirstNonWhitespacePosition == -1) {
  1128     nsScannerIterator iter(mCurrentPosition);
  1129     nsScannerIterator end(mEndPosition);
  1131     while (iter != end) {
  1132       if (!nsCRT::IsAsciiSpace(*iter)) {
  1133         mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
  1135         break;
  1138       ++iter;
  1141   return true;
  1144 /**
  1145  *  call this to copy bytes out of the scanner that have not yet been consumed
  1146  *  by the tokenization process.
  1148  *  @update  gess 5/12/98
  1149  *  @param   aCopyBuffer is where the scanner buffer will be copied to
  1150  *  @return  nada
  1151  */
  1152 void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
  1153   if (!mSlidingBuffer) {
  1154     aCopyBuffer.Truncate();
  1155     return;
  1158   nsScannerIterator start, end;
  1159   start = mCurrentPosition;
  1160   end = mEndPosition;
  1162   CopyUnicodeTo(start, end, aCopyBuffer);
  1165 /**
  1166  *  Retrieve the name of the file that the scanner is reading from.
  1167  *  In some cases, it's just a given name, because the scanner isn't
  1168  *  really reading from a file.
  1170  *  @update  gess 5/12/98
  1171  *  @return  
  1172  */
  1173 nsString& nsScanner::GetFilename(void) {
  1174   return mFilename;
  1177 /**
  1178  *  Conduct self test. Actually, selftesting for this class
  1179  *  occurs in the parser selftest.
  1181  *  @update  gess 3/25/98
  1182  *  @param   
  1183  *  @return  
  1184  */
  1186 void nsScanner::SelfTest(void) {
  1187 #ifdef _DEBUG
  1188 #endif
  1191 void nsScanner::OverrideReplacementCharacter(char16_t aReplacementCharacter)
  1193   mReplacementCharacter = aReplacementCharacter;
  1195   if (mHasInvalidCharacter) {
  1196     ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);

mercurial