diff -r 000000000000 -r 6474c204b198 parser/htmlparser/src/nsScanner.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/parser/htmlparser/src/nsScanner.cpp	Wed Dec 31 06:09:35 2014 +0100
@@ -0,0 +1,1199 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 sw=2 et tw=78: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//#define __INCREMENTAL 1
+
+#include "mozilla/DebugOnly.h"
+
+#include "nsScanner.h"
+#include "nsDebug.h"
+#include "nsReadableUtils.h"
+#include "nsIInputStream.h"
+#include "nsIFile.h"
+#include "nsNetUtil.h"
+#include "nsUTF8Utils.h" // for LossyConvertEncoding
+#include "nsCRT.h"
+#include "nsParser.h"
+#include "nsCharsetSource.h"
+
+#include "mozilla/dom/EncodingUtils.h"
+
+using mozilla::dom::EncodingUtils;
+
+// We replace NUL characters with this character.
+static char16_t sInvalid = UCS2_REPLACEMENT_CHAR;
+
+nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :
+  mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set
+{
+  // Build filter that will be used to filter out characters with
+  // bits that none of the terminal chars have. This works very well
+  // because terminal chars often have only the last 4-6 bits set and
+  // normal ascii letters have bit 7 set. Other letters have even higher
+  // bits set.
+  
+  // Calculate filter
+  const char16_t *current = aTerminateChars;
+  char16_t terminalChar = *current;
+  while (terminalChar) {
+    mFilter &= ~terminalChar;
+    ++current;
+    terminalChar = *current;
+  }
+}
+
+/**
+ *  Use this constructor if you want i/o to be based on 
+ *  a single string you hand in during construction.
+ *  This short cut was added for Javascript.
+ *
+ *  @update  gess 5/12/98
+ *  @param   aMode represents the parser mode (nav, other)
+ *  @return  
+ */
+nsScanner::nsScanner(const nsAString& anHTMLString)
+{
+  MOZ_COUNT_CTOR(nsScanner);
+
+  mSlidingBuffer = nullptr;
+  mCountRemaining = 0;
+  mFirstNonWhitespacePosition = -1;
+  if (AppendToBuffer(anHTMLString)) {
+    mSlidingBuffer->BeginReading(mCurrentPosition);
+  } else {
+    /* XXX see hack below, re: bug 182067 */
+    memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
+    mEndPosition = mCurrentPosition;
+  }
+  mMarkPosition = mCurrentPosition;
+  mIncremental = false;
+  mUnicodeDecoder = 0;
+  mCharsetSource = kCharsetUninitialized;
+  mHasInvalidCharacter = false;
+  mReplacementCharacter = char16_t(0x0);
+}
+
+/**
+ *  Use this constructor if you want i/o to be based on strings 
+ *  the scanner receives. If you pass a null filename, you
+ *  can still provide data to the scanner via append.
+ */
+nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
+  : mFilename(aFilename)
+{
+  MOZ_COUNT_CTOR(nsScanner);
+  NS_ASSERTION(!aCreateStream, "This is always true.");
+
+  mSlidingBuffer = nullptr;
+
+  // XXX This is a big hack.  We need to initialize the iterators to something.
+  // What matters is that mCurrentPosition == mEndPosition, so that our methods
+  // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
+  // so that we have some hope of catching null pointer dereferences associated
+  // with this hack. --darin
+  memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
+  mMarkPosition = mCurrentPosition;
+  mEndPosition = mCurrentPosition;
+
+  mIncremental = true;
+  mFirstNonWhitespacePosition = -1;
+  mCountRemaining = 0;
+
+  mUnicodeDecoder = 0;
+  mCharsetSource = kCharsetUninitialized;
+  mHasInvalidCharacter = false;
+  mReplacementCharacter = char16_t(0x0);
+  // XML defaults to UTF-8 and about:blank is UTF-8, too.
+  SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault);
+}
+
+nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource)
+{
+  if (aSource < mCharsetSource) // priority is lower than the current one
+    return NS_OK;
+
+  mCharsetSource = aSource;
+
+  nsCString charsetName;
+  mozilla::DebugOnly<bool> valid =
+      EncodingUtils::FindEncodingForLabel(aCharset, charsetName);
+  MOZ_ASSERT(valid, "Should never call with a bogus aCharset.");
+
+  if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
+    return NS_OK; // no difference, don't change it
+  }
+
+  // different, need to change it
+
+  mCharset.Assign(charsetName);
+
+  mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
+  mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
+
+  return NS_OK;
+}
+
+
+/**
+ *  default destructor
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+nsScanner::~nsScanner() {
+
+  delete mSlidingBuffer;
+
+  MOZ_COUNT_DTOR(nsScanner);
+}
+
+/**
+ *  Resets current offset position of input stream to marked position. 
+ *  This allows us to back up to this point if the need should arise, 
+ *  such as when tokenization gets interrupted.
+ *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+ *
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+void nsScanner::RewindToMark(void){
+  if (mSlidingBuffer) {
+    mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));
+    mCurrentPosition = mMarkPosition;
+  }
+}
+
+
+/**
+ *  Records current offset position in input stream. This allows us
+ *  to back up to this point if the need should arise, such as when
+ *  tokenization gets interrupted.
+ *
+ *  @update  gess 7/29/98
+ *  @param   
+ *  @return  
+ */
+int32_t nsScanner::Mark() {
+  int32_t distance = 0;
+  if (mSlidingBuffer) {
+    nsScannerIterator oldStart;
+    mSlidingBuffer->BeginReading(oldStart);
+
+    distance = Distance(oldStart, mCurrentPosition);
+
+    mSlidingBuffer->DiscardPrefix(mCurrentPosition);
+    mSlidingBuffer->BeginReading(mCurrentPosition);
+    mMarkPosition = mCurrentPosition;
+  }
+
+  return distance;
+}
+
+/** 
+ * Insert data to our underlying input buffer as
+ * if it were read from an input stream.
+ *
+ * @update  harishd 01/12/99
+ * @return  error code 
+ */
+bool nsScanner::UngetReadable(const nsAString& aBuffer) {
+  if (!mSlidingBuffer) {
+    return false;
+  }
+
+  mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
+  mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
+  mSlidingBuffer->EndReading(mEndPosition);
+ 
+  uint32_t length = aBuffer.Length();
+  mCountRemaining += length; // Ref. bug 117441
+  return true;
+}
+
+/** 
+ * Append data to our underlying input buffer as
+ * if it were read from an input stream.
+ *
+ * @update  gess4/3/98
+ * @return  error code 
+ */
+nsresult nsScanner::Append(const nsAString& aBuffer) {
+  if (!AppendToBuffer(aBuffer))
+    return NS_ERROR_OUT_OF_MEMORY;
+  return NS_OK;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/21/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen,
+                           nsIRequest *aRequest)
+{
+  nsresult res = NS_OK;
+  if (mUnicodeDecoder) {
+    int32_t unicharBufLen = 0;
+    mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
+    nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
+    NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
+    char16_t *unichars = buffer->DataStart();
+
+    int32_t totalChars = 0;
+    int32_t unicharLength = unicharBufLen;
+    int32_t errorPos = -1;
+
+    do {
+      int32_t srcLength = aLen;
+      res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
+
+      totalChars += unicharLength;
+      // Continuation of failure case
+      if(NS_FAILED(res)) {
+        // if we failed, we consume one byte, replace it with the replacement
+        // character and try the conversion again.
+
+        // This is only needed because some decoders don't follow the
+        // nsIUnicodeDecoder contract: they return a failure when *aDestLength
+        // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT.  See bug 244177
+        if ((unichars + unicharLength) >= buffer->DataEnd()) {
+          NS_ERROR("Unexpected end of destination buffer");
+          break;
+        }
+
+        if (mReplacementCharacter == 0x0 && errorPos == -1) {
+          errorPos = totalChars;
+        }
+        unichars[unicharLength++] = mReplacementCharacter == 0x0 ?
+                                    mUnicodeDecoder->GetCharacterForUnMapped() :
+                                    mReplacementCharacter;
+
+        unichars = unichars + unicharLength;
+        unicharLength = unicharBufLen - (++totalChars);
+
+        mUnicodeDecoder->Reset();
+
+        if(((uint32_t) (srcLength + 1)) > aLen) {
+          srcLength = aLen;
+        }
+        else {
+          ++srcLength;
+        }
+
+        aBuffer += srcLength;
+        aLen -= srcLength;
+      }
+    } while (NS_FAILED(res) && (aLen > 0));
+
+    buffer->SetDataLength(totalChars);
+    // Don't propagate return code of unicode decoder
+    // since it doesn't reflect on our success or failure
+    // - Ref. bug 87110
+    res = NS_OK; 
+    if (!AppendToBuffer(buffer, aRequest, errorPos))
+      res = NS_ERROR_OUT_OF_MEMORY;
+  }
+  else {
+    NS_WARNING("No decoder found.");
+    res = NS_ERROR_FAILURE;
+  }
+
+  return res;
+}
+
+/**
+ *  retrieve next char from scanners internal input stream
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  error code reflecting read status
+ */
+nsresult nsScanner::GetChar(char16_t& aChar) {
+  if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
+    aChar = 0;
+    return kEOF;
+  }
+
+  aChar = *mCurrentPosition++;
+  --mCountRemaining;
+
+  return NS_OK;
+}
+
+
+/**
+ *  peek ahead to consume next char from scanner's internal
+ *  input buffer
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {
+  aChar = 0;
+
+  if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
+    return kEOF;
+  }
+
+  if (aOffset > 0) {
+    if (mCountRemaining <= aOffset)
+      return kEOF;
+
+    nsScannerIterator pos = mCurrentPosition;
+    pos.advance(aOffset);
+    aChar=*pos;
+  }
+  else {
+    aChar=*mCurrentPosition;
+  }
+
+  return NS_OK;
+}
+
+nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)
+{
+  if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
+    return kEOF;
+  }
+
+  nsScannerIterator start, end;
+
+  start = mCurrentPosition;
+
+  if ((int32_t)mCountRemaining <= aOffset) {
+    return kEOF;
+  }
+
+  if (aOffset > 0) {
+    start.advance(aOffset);
+  }
+
+  if (mCountRemaining < uint32_t(aNumChars + aOffset)) {
+    end = mEndPosition;
+  }
+  else {
+    end = start;
+    end.advance(aNumChars);
+  }
+
+  CopyUnicodeTo(start, end, aStr);
+
+  return NS_OK;
+}
+
+
+/**
+ *  Skip whitespace on scanner input stream
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  error status
+ */
+nsresult nsScanner::SkipWhitespace(int32_t& aNewlinesSkipped) {
+
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  char16_t theChar = 0;
+  nsresult  result = Peek(theChar);
+  
+  if (NS_FAILED(result)) {
+    return result;
+  }
+  
+  nsScannerIterator current = mCurrentPosition;
+  bool      done = false;
+  bool      skipped = false;
+  
+  while (!done && current != mEndPosition) {
+    switch(theChar) {
+      case '\n':
+      case '\r': ++aNewlinesSkipped;
+      case ' ' :
+      case '\t':
+        {
+          skipped = true;
+          char16_t thePrevChar = theChar;
+          theChar = (++current != mEndPosition) ? *current : '\0';
+          if ((thePrevChar == '\r' && theChar == '\n') ||
+              (thePrevChar == '\n' && theChar == '\r')) {
+            theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF
+          }
+        }
+        break;
+      default:
+        done = true;
+        break;
+    }
+  }
+
+  if (skipped) {
+    SetPosition(current);
+    if (current == mEndPosition) {
+      result = kEOF;
+    }
+  }
+
+  return result;
+}
+
+/**
+ *  Skip over chars as long as they equal given char
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  error code
+ */
+nsresult nsScanner::SkipOver(char16_t aSkipChar){
+
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  char16_t ch=0;
+  nsresult   result=NS_OK;
+
+  while(NS_OK==result) {
+    result=Peek(ch);
+    if(NS_OK == result) {
+      if(ch!=aSkipChar) {
+        break;
+      }
+      GetChar(ch);
+    } 
+    else break;
+  } //while
+  return result;
+
+}
+
+#if 0
+void DoErrTest(nsString& aString) {
+  int32_t pos=aString.FindChar(0);
+  if(kNotFound<pos) {
+    if(aString.Length()-1!=pos) {
+    }
+  }
+}
+
+void DoErrTest(nsCString& aString) {
+  int32_t pos=aString.FindChar(0);
+  if(kNotFound<pos) {
+    if(aString.Length()-1!=pos) {
+    }
+  }
+}
+#endif
+
+/**
+ *  Consume characters until you run into space, a '<', a '>', or a '/'.
+ *  
+ *  @param   aString - receives new data from stream
+ *  @return  error code
+ */
+nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) {
+
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  char16_t         theChar=0;
+  nsresult          result=Peek(theChar);
+  nsScannerIterator current, end;
+  bool              found=false;  
+  
+  current = mCurrentPosition;
+  end = mEndPosition;
+
+  // Loop until we find an illegal character. Everything is then appended
+  // later.
+  while(current != end && !found) {
+    theChar=*current;
+
+    switch(theChar) {
+      case '\n':
+      case '\r':
+      case ' ' :
+      case '\t':
+      case '\v':
+      case '\f':
+      case '<':
+      case '>':
+      case '/':
+        found = true;
+        break;
+
+      case '\0':
+        ReplaceCharacter(current, sInvalid);
+        break;
+
+      default:
+        break;
+    }
+
+    if (!found) {
+      ++current;
+    }
+  }
+
+  // Don't bother appending nothing.
+  if (current != mCurrentPosition) {
+    AppendUnicodeTo(mCurrentPosition, current, aString);
+  }
+
+  SetPosition(current);  
+  if (current == end) {
+    result = kEOF;
+  }
+
+  //DoErrTest(aString);
+
+  return result;
+}
+
+/**
+ *  Consume characters until you run into a char that's not valid in an
+ *  entity name
+ *  
+ *  @param   aString - receives new data from stream
+ *  @return  error code
+ */
+nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
+
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  char16_t         theChar=0;
+  nsresult          result=Peek(theChar);
+  nsScannerIterator origin, current, end;
+  bool              found=false;  
+
+  origin = mCurrentPosition;
+  current = mCurrentPosition;
+  end = mEndPosition;
+
+  while(current != end) {
+ 
+    theChar=*current;
+    if(theChar) {
+      found=false;
+      switch(theChar) {
+        case '_':
+        case '-':
+        case '.':
+          // Don't allow ':' in entity names.  See bug 23791
+          found = true;
+          break;
+        default:
+          found = ('a'<=theChar && theChar<='z') ||
+                  ('A'<=theChar && theChar<='Z') ||
+                  ('0'<=theChar && theChar<='9');
+          break;
+      }
+
+      if(!found) {
+        AppendUnicodeTo(mCurrentPosition, current, aString);
+        break;
+      }
+    }
+    ++current;
+  }
+  
+  SetPosition(current);
+  if (current == end) {
+    AppendUnicodeTo(origin, current, aString);
+    return kEOF;
+  }
+
+  //DoErrTest(aString);
+
+  return result;
+}
+
+/**
+ *  Consume digits 
+ *  
+ *  @param   aString - should contain digits
+ *  @return  error code
+ */
+nsresult nsScanner::ReadNumber(nsString& aString,int32_t aBase) {
+
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
+
+  char16_t         theChar=0;
+  nsresult          result=Peek(theChar);
+  nsScannerIterator origin, current, end;
+
+  origin = mCurrentPosition;
+  current = origin;
+  end = mEndPosition;
+
+  bool done = false;
+  while(current != end) {
+    theChar=*current;
+    if(theChar) {
+      done = (theChar < '0' || theChar > '9') && 
+             ((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
+                             (theChar < 'a' || theChar > 'f')
+                             :true);
+      if(done) {
+        AppendUnicodeTo(origin, current, aString);
+        break;
+      }
+    }
+    ++current;
+  }
+
+  SetPosition(current);
+  if (current == end) {
+    AppendUnicodeTo(origin, current, aString);
+    return kEOF;
+  }
+
+  //DoErrTest(aString);
+
+  return result;
+}
+
+/**
+ *  Consume characters until you find the terminal char
+ *  
+ *  @update  gess 3/25/98
+ *  @param   aString receives new data from stream
+ *  @param   addTerminal tells us whether to append terminal to aString
+ *  @return  error code
+ */
+nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString,
+                                   int32_t& aNewlinesSkipped,
+                                   bool& aHaveCR) {
+
+  aHaveCR = false;
+
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  char16_t theChar = 0;
+  nsresult  result = Peek(theChar);
+  
+  if (NS_FAILED(result)) {
+    return result;
+  }
+  
+  nsScannerIterator origin, current, end;
+  bool done = false;  
+
+  origin = mCurrentPosition;
+  current = origin;
+  end = mEndPosition;
+
+  bool haveCR = false;
+
+  while(!done && current != end) {
+    switch(theChar) {
+      case '\n':
+      case '\r':
+        {
+          ++aNewlinesSkipped;
+          char16_t thePrevChar = theChar;
+          theChar = (++current != end) ? *current : '\0';
+          if ((thePrevChar == '\r' && theChar == '\n') ||
+              (thePrevChar == '\n' && theChar == '\r')) {
+            theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
+            haveCR = true;
+          } else if (thePrevChar == '\r') {
+            // Lone CR becomes CRLF; callers should know to remove extra CRs
+            AppendUnicodeTo(origin, current, aString);
+            aString.writable().Append(char16_t('\n'));
+            origin = current;
+            haveCR = true;
+          }
+        }
+        break;
+      case ' ' :
+      case '\t':
+        theChar = (++current != end) ? *current : '\0';
+        break;
+      default:
+        done = true;
+        AppendUnicodeTo(origin, current, aString);
+        break;
+    }
+  }
+
+  SetPosition(current);
+  if (current == end) {
+    AppendUnicodeTo(origin, current, aString);
+    result = kEOF;
+  }
+
+  aHaveCR = haveCR;
+  return result;
+}
+
+//XXXbz callers of this have to manage their lone '\r' themselves if they want
+//it to work.  Good thing they're all in view-source and it deals.
+nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart, 
+                                   nsScannerIterator& aEnd,
+                                   int32_t& aNewlinesSkipped) {
+
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  char16_t theChar = 0;
+  nsresult  result = Peek(theChar);
+  
+  if (NS_FAILED(result)) {
+    return result;
+  }
+  
+  nsScannerIterator origin, current, end;
+  bool done = false;  
+
+  origin = mCurrentPosition;
+  current = origin;
+  end = mEndPosition;
+
+  while(!done && current != end) {
+    switch(theChar) {
+      case '\n':
+      case '\r': ++aNewlinesSkipped;
+      case ' ' :
+      case '\t':
+        {
+          char16_t thePrevChar = theChar;
+          theChar = (++current != end) ? *current : '\0';
+          if ((thePrevChar == '\r' && theChar == '\n') ||
+              (thePrevChar == '\n' && theChar == '\r')) {
+            theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
+          }
+        }
+        break;
+      default:
+        done = true;
+        aStart = origin;
+        aEnd = current;
+        break;
+    }
+  }
+
+  SetPosition(current);
+  if (current == end) {
+    aStart = origin;
+    aEnd = current;
+    result = kEOF;
+  }
+
+  return result;
+}
+
+/**
+ *  Consume characters until you encounter one contained in given
+ *  input set.
+ *  
+ *  @update  gess 3/25/98
+ *  @param   aString will contain the result of this method
+ *  @param   aTerminalSet is an ordered string that contains
+ *           the set of INVALID characters
+ *  @return  error code
+ */
+nsresult nsScanner::ReadUntil(nsAString& aString,
+                              const nsReadEndCondition& aEndCondition,
+                              bool addTerminal)
+{  
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  nsScannerIterator origin, current;
+  const char16_t* setstart = aEndCondition.mChars;
+  const char16_t* setcurrent;
+
+  origin = mCurrentPosition;
+  current = origin;
+
+  char16_t         theChar=0;
+  nsresult          result=Peek(theChar);
+
+  if (NS_FAILED(result)) {
+    return result;
+  }
+  
+  while (current != mEndPosition) {
+    theChar = *current;
+    if (theChar == '\0') {
+      ReplaceCharacter(current, sInvalid);
+      theChar = sInvalid;
+    }
+
+    // Filter out completely wrong characters
+    // Check if all bits are in the required area
+    if(!(theChar & aEndCondition.mFilter)) {
+      // They were. Do a thorough check.
+
+      setcurrent = setstart;
+      while (*setcurrent) {
+        if (*setcurrent == theChar) {
+          if(addTerminal)
+            ++current;
+          AppendUnicodeTo(origin, current, aString);
+          SetPosition(current);
+
+          //DoErrTest(aString);
+
+          return NS_OK;
+        }
+        ++setcurrent;
+      }
+    }
+    
+    ++current;
+  }
+
+  // If we are here, we didn't find any terminator in the string and
+  // current = mEndPosition
+  SetPosition(current);
+  AppendUnicodeTo(origin, current, aString);
+  return kEOF;
+}
+
+nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString,
+                              const nsReadEndCondition& aEndCondition,
+                              bool addTerminal)
+{  
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  nsScannerIterator origin, current;
+  const char16_t* setstart = aEndCondition.mChars;
+  const char16_t* setcurrent;
+
+  origin = mCurrentPosition;
+  current = origin;
+
+  char16_t         theChar=0;
+  nsresult          result=Peek(theChar);
+
+  if (NS_FAILED(result)) {
+    return result;
+  }
+  
+  while (current != mEndPosition) {
+    theChar = *current;
+    if (theChar == '\0') {
+      ReplaceCharacter(current, sInvalid);
+      theChar = sInvalid;
+    }
+
+    // Filter out completely wrong characters
+    // Check if all bits are in the required area
+    if(!(theChar & aEndCondition.mFilter)) {
+      // They were. Do a thorough check.
+
+      setcurrent = setstart;
+      while (*setcurrent) {
+        if (*setcurrent == theChar) {
+          if(addTerminal)
+            ++current;
+          AppendUnicodeTo(origin, current, aString);
+          SetPosition(current);
+
+          //DoErrTest(aString);
+
+          return NS_OK;
+        }
+        ++setcurrent;
+      }
+    }
+    
+    ++current;
+  }
+
+  // If we are here, we didn't find any terminator in the string and
+  // current = mEndPosition
+  SetPosition(current);
+  AppendUnicodeTo(origin, current, aString);
+  return kEOF;
+}
+
+nsresult nsScanner::ReadUntil(nsScannerIterator& aStart, 
+                              nsScannerIterator& aEnd,
+                              const nsReadEndCondition &aEndCondition,
+                              bool addTerminal)
+{
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  nsScannerIterator origin, current;
+  const char16_t* setstart = aEndCondition.mChars;
+  const char16_t* setcurrent;
+
+  origin = mCurrentPosition;
+  current = origin;
+
+  char16_t         theChar=0;
+  nsresult          result=Peek(theChar);
+  
+  if (NS_FAILED(result)) {
+    aStart = aEnd = current;
+    return result;
+  }
+  
+  while (current != mEndPosition) {
+    theChar = *current;
+    if (theChar == '\0') {
+      ReplaceCharacter(current, sInvalid);
+      theChar = sInvalid;
+    }
+
+    // Filter out completely wrong characters
+    // Check if all bits are in the required area
+    if(!(theChar & aEndCondition.mFilter)) {
+      // They were. Do a thorough check.
+      setcurrent = setstart;
+      while (*setcurrent) {
+        if (*setcurrent == theChar) {
+          if(addTerminal)
+            ++current;
+          aStart = origin;
+          aEnd = current;
+          SetPosition(current);
+
+          return NS_OK;
+        }
+        ++setcurrent;
+      }
+    }
+
+    ++current;
+  }
+
+  // If we are here, we didn't find any terminator in the string and
+  // current = mEndPosition
+  SetPosition(current);
+  aStart = origin;
+  aEnd = current;
+  return kEOF;
+}
+
+/**
+ *  Consumes chars until you see the given terminalChar
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  error code
+ */
+nsresult nsScanner::ReadUntil(nsAString& aString,
+                              char16_t aTerminalChar,
+                              bool addTerminal)
+{
+  if (!mSlidingBuffer) {
+    return kEOF;
+  }
+
+  nsScannerIterator origin, current;
+
+  origin = mCurrentPosition;
+  current = origin;
+
+  char16_t theChar;
+  nsresult result = Peek(theChar);
+
+  if (NS_FAILED(result)) {
+    return result;
+  }
+
+  while (current != mEndPosition) {
+    theChar = *current;
+    if (theChar == '\0') {
+      ReplaceCharacter(current, sInvalid);
+      theChar = sInvalid;
+    }
+
+    if (aTerminalChar == theChar) {
+      if(addTerminal)
+        ++current;
+      AppendUnicodeTo(origin, current, aString);
+      SetPosition(current);
+      return NS_OK;
+    }
+    ++current;
+  }
+
+  // If we are here, we didn't find any terminator in the string and
+  // current = mEndPosition
+  AppendUnicodeTo(origin, current, aString);
+  SetPosition(current);
+  return kEOF;
+
+}
+
+void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
+{
+  aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
+}
+
+void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
+{
+  aPosition = mCurrentPosition;
+}
+
+void nsScanner::EndReading(nsScannerIterator& aPosition)
+{
+  aPosition = mEndPosition;
+}
+ 
+void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate, bool aReverse)
+{
+  if (mSlidingBuffer) {
+#ifdef DEBUG
+    uint32_t origRemaining = mCountRemaining;
+#endif
+
+    if (aReverse) {
+      mCountRemaining += (Distance(aPosition, mCurrentPosition));
+    }
+    else {
+      mCountRemaining -= (Distance(mCurrentPosition, aPosition));
+    }
+
+    NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) ||
+                 (mCountRemaining <= origRemaining && !aReverse),
+                 "Improper use of nsScanner::SetPosition. Make sure to set the"
+                 " aReverse parameter correctly");
+
+    mCurrentPosition = aPosition;
+    if (aTerminate && (mCurrentPosition == mEndPosition)) {
+      mMarkPosition = mCurrentPosition;
+      mSlidingBuffer->DiscardPrefix(mCurrentPosition);
+    }
+  }
+}
+
+void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
+                                 char16_t aChar)
+{
+  if (mSlidingBuffer) {
+    mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
+  }
+}
+
+bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
+                                 nsIRequest *aRequest,
+                                 int32_t aErrorPos)
+{
+  uint32_t countRemaining = mCountRemaining;
+  if (!mSlidingBuffer) {
+    mSlidingBuffer = new nsScannerString(aBuf);
+    if (!mSlidingBuffer)
+      return false;
+    mSlidingBuffer->BeginReading(mCurrentPosition);
+    mMarkPosition = mCurrentPosition;
+    mSlidingBuffer->EndReading(mEndPosition);
+    mCountRemaining = aBuf->DataLength();
+  }
+  else {
+    mSlidingBuffer->AppendBuffer(aBuf);
+    if (mCurrentPosition == mEndPosition) {
+      mSlidingBuffer->BeginReading(mCurrentPosition);
+    }
+    mSlidingBuffer->EndReading(mEndPosition);
+    mCountRemaining += aBuf->DataLength();
+  }
+
+  if (aErrorPos != -1 && !mHasInvalidCharacter) {
+    mHasInvalidCharacter = true;
+    mFirstInvalidPosition = mCurrentPosition;
+    mFirstInvalidPosition.advance(countRemaining + aErrorPos);
+  }
+
+  if (mFirstNonWhitespacePosition == -1) {
+    nsScannerIterator iter(mCurrentPosition);
+    nsScannerIterator end(mEndPosition);
+
+    while (iter != end) {
+      if (!nsCRT::IsAsciiSpace(*iter)) {
+        mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
+
+        break;
+      }
+
+      ++iter;
+    }
+  }
+  return true;
+}
+
+/**
+ *  call this to copy bytes out of the scanner that have not yet been consumed
+ *  by the tokenization process.
+ *  
+ *  @update  gess 5/12/98
+ *  @param   aCopyBuffer is where the scanner buffer will be copied to
+ *  @return  nada
+ */
+void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
+  if (!mSlidingBuffer) {
+    aCopyBuffer.Truncate();
+    return;
+  }
+
+  nsScannerIterator start, end;
+  start = mCurrentPosition;
+  end = mEndPosition;
+
+  CopyUnicodeTo(start, end, aCopyBuffer);
+}
+
+/**
+ *  Retrieve the name of the file that the scanner is reading from.
+ *  In some cases, it's just a given name, because the scanner isn't
+ *  really reading from a file.
+ *  
+ *  @update  gess 5/12/98
+ *  @return  
+ */
+nsString& nsScanner::GetFilename(void) {
+  return mFilename;
+}
+
+/**
+ *  Conduct self test. Actually, selftesting for this class
+ *  occurs in the parser selftest.
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+
+void nsScanner::SelfTest(void) {
+#ifdef _DEBUG
+#endif
+}
+
+void nsScanner::OverrideReplacementCharacter(char16_t aReplacementCharacter)
+{
+  mReplacementCharacter = aReplacementCharacter;
+
+  if (mHasInvalidCharacter) {
+    ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);
+  }
+}
+