diff -r 000000000000 -r 6474c204b198 parser/htmlparser/src/nsScanner.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parser/htmlparser/src/nsScanner.cpp Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,1199 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 sw=2 et tw=78: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//#define __INCREMENTAL 1 + +#include "mozilla/DebugOnly.h" + +#include "nsScanner.h" +#include "nsDebug.h" +#include "nsReadableUtils.h" +#include "nsIInputStream.h" +#include "nsIFile.h" +#include "nsNetUtil.h" +#include "nsUTF8Utils.h" // for LossyConvertEncoding +#include "nsCRT.h" +#include "nsParser.h" +#include "nsCharsetSource.h" + +#include "mozilla/dom/EncodingUtils.h" + +using mozilla::dom::EncodingUtils; + +// We replace NUL characters with this character. +static char16_t sInvalid = UCS2_REPLACEMENT_CHAR; + +nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) : + mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set +{ + // Build filter that will be used to filter out characters with + // bits that none of the terminal chars have. This works very well + // because terminal chars often have only the last 4-6 bits set and + // normal ascii letters have bit 7 set. Other letters have even higher + // bits set. + + // Calculate filter + const char16_t *current = aTerminateChars; + char16_t terminalChar = *current; + while (terminalChar) { + mFilter &= ~terminalChar; + ++current; + terminalChar = *current; + } +} + +/** + * Use this constructor if you want i/o to be based on + * a single string you hand in during construction. + * This short cut was added for Javascript. + * + * @update gess 5/12/98 + * @param aMode represents the parser mode (nav, other) + * @return + */ +nsScanner::nsScanner(const nsAString& anHTMLString) +{ + MOZ_COUNT_CTOR(nsScanner); + + mSlidingBuffer = nullptr; + mCountRemaining = 0; + mFirstNonWhitespacePosition = -1; + if (AppendToBuffer(anHTMLString)) { + mSlidingBuffer->BeginReading(mCurrentPosition); + } else { + /* XXX see hack below, re: bug 182067 */ + memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); + mEndPosition = mCurrentPosition; + } + mMarkPosition = mCurrentPosition; + mIncremental = false; + mUnicodeDecoder = 0; + mCharsetSource = kCharsetUninitialized; + mHasInvalidCharacter = false; + mReplacementCharacter = char16_t(0x0); +} + +/** + * Use this constructor if you want i/o to be based on strings + * the scanner receives. If you pass a null filename, you + * can still provide data to the scanner via append. + */ +nsScanner::nsScanner(nsString& aFilename, bool aCreateStream) + : mFilename(aFilename) +{ + MOZ_COUNT_CTOR(nsScanner); + NS_ASSERTION(!aCreateStream, "This is always true."); + + mSlidingBuffer = nullptr; + + // XXX This is a big hack. We need to initialize the iterators to something. + // What matters is that mCurrentPosition == mEndPosition, so that our methods + // believe that we are at EOF (see bug 182067). We null out mCurrentPosition + // so that we have some hope of catching null pointer dereferences associated + // with this hack. --darin + memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); + mMarkPosition = mCurrentPosition; + mEndPosition = mCurrentPosition; + + mIncremental = true; + mFirstNonWhitespacePosition = -1; + mCountRemaining = 0; + + mUnicodeDecoder = 0; + mCharsetSource = kCharsetUninitialized; + mHasInvalidCharacter = false; + mReplacementCharacter = char16_t(0x0); + // XML defaults to UTF-8 and about:blank is UTF-8, too. + SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault); +} + +nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource) +{ + if (aSource < mCharsetSource) // priority is lower than the current one + return NS_OK; + + mCharsetSource = aSource; + + nsCString charsetName; + mozilla::DebugOnly valid = + EncodingUtils::FindEncodingForLabel(aCharset, charsetName); + MOZ_ASSERT(valid, "Should never call with a bogus aCharset."); + + if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) { + return NS_OK; // no difference, don't change it + } + + // different, need to change it + + mCharset.Assign(charsetName); + + mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); + mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); + + return NS_OK; +} + + +/** + * default destructor + * + * @update gess 3/25/98 + * @param + * @return + */ +nsScanner::~nsScanner() { + + delete mSlidingBuffer; + + MOZ_COUNT_DTOR(nsScanner); +} + +/** + * Resets current offset position of input stream to marked position. + * This allows us to back up to this point if the need should arise, + * such as when tokenization gets interrupted. + * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! + * + * @update gess 5/12/98 + * @param + * @return + */ +void nsScanner::RewindToMark(void){ + if (mSlidingBuffer) { + mCountRemaining += (Distance(mMarkPosition, mCurrentPosition)); + mCurrentPosition = mMarkPosition; + } +} + + +/** + * Records current offset position in input stream. This allows us + * to back up to this point if the need should arise, such as when + * tokenization gets interrupted. + * + * @update gess 7/29/98 + * @param + * @return + */ +int32_t nsScanner::Mark() { + int32_t distance = 0; + if (mSlidingBuffer) { + nsScannerIterator oldStart; + mSlidingBuffer->BeginReading(oldStart); + + distance = Distance(oldStart, mCurrentPosition); + + mSlidingBuffer->DiscardPrefix(mCurrentPosition); + mSlidingBuffer->BeginReading(mCurrentPosition); + mMarkPosition = mCurrentPosition; + } + + return distance; +} + +/** + * Insert data to our underlying input buffer as + * if it were read from an input stream. + * + * @update harishd 01/12/99 + * @return error code + */ +bool nsScanner::UngetReadable(const nsAString& aBuffer) { + if (!mSlidingBuffer) { + return false; + } + + mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition); + mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators + mSlidingBuffer->EndReading(mEndPosition); + + uint32_t length = aBuffer.Length(); + mCountRemaining += length; // Ref. bug 117441 + return true; +} + +/** + * Append data to our underlying input buffer as + * if it were read from an input stream. + * + * @update gess4/3/98 + * @return error code + */ +nsresult nsScanner::Append(const nsAString& aBuffer) { + if (!AppendToBuffer(aBuffer)) + return NS_ERROR_OUT_OF_MEMORY; + return NS_OK; +} + +/** + * + * + * @update gess 5/21/98 + * @param + * @return + */ +nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen, + nsIRequest *aRequest) +{ + nsresult res = NS_OK; + if (mUnicodeDecoder) { + int32_t unicharBufLen = 0; + mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen); + nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1); + NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY); + char16_t *unichars = buffer->DataStart(); + + int32_t totalChars = 0; + int32_t unicharLength = unicharBufLen; + int32_t errorPos = -1; + + do { + int32_t srcLength = aLen; + res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength); + + totalChars += unicharLength; + // Continuation of failure case + if(NS_FAILED(res)) { + // if we failed, we consume one byte, replace it with the replacement + // character and try the conversion again. + + // This is only needed because some decoders don't follow the + // nsIUnicodeDecoder contract: they return a failure when *aDestLength + // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT. See bug 244177 + if ((unichars + unicharLength) >= buffer->DataEnd()) { + NS_ERROR("Unexpected end of destination buffer"); + break; + } + + if (mReplacementCharacter == 0x0 && errorPos == -1) { + errorPos = totalChars; + } + unichars[unicharLength++] = mReplacementCharacter == 0x0 ? + mUnicodeDecoder->GetCharacterForUnMapped() : + mReplacementCharacter; + + unichars = unichars + unicharLength; + unicharLength = unicharBufLen - (++totalChars); + + mUnicodeDecoder->Reset(); + + if(((uint32_t) (srcLength + 1)) > aLen) { + srcLength = aLen; + } + else { + ++srcLength; + } + + aBuffer += srcLength; + aLen -= srcLength; + } + } while (NS_FAILED(res) && (aLen > 0)); + + buffer->SetDataLength(totalChars); + // Don't propagate return code of unicode decoder + // since it doesn't reflect on our success or failure + // - Ref. bug 87110 + res = NS_OK; + if (!AppendToBuffer(buffer, aRequest, errorPos)) + res = NS_ERROR_OUT_OF_MEMORY; + } + else { + NS_WARNING("No decoder found."); + res = NS_ERROR_FAILURE; + } + + return res; +} + +/** + * retrieve next char from scanners internal input stream + * + * @update gess 3/25/98 + * @param + * @return error code reflecting read status + */ +nsresult nsScanner::GetChar(char16_t& aChar) { + if (!mSlidingBuffer || mCurrentPosition == mEndPosition) { + aChar = 0; + return kEOF; + } + + aChar = *mCurrentPosition++; + --mCountRemaining; + + return NS_OK; +} + + +/** + * peek ahead to consume next char from scanner's internal + * input buffer + * + * @update gess 3/25/98 + * @param + * @return + */ +nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) { + aChar = 0; + + if (!mSlidingBuffer || mCurrentPosition == mEndPosition) { + return kEOF; + } + + if (aOffset > 0) { + if (mCountRemaining <= aOffset) + return kEOF; + + nsScannerIterator pos = mCurrentPosition; + pos.advance(aOffset); + aChar=*pos; + } + else { + aChar=*mCurrentPosition; + } + + return NS_OK; +} + +nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset) +{ + if (!mSlidingBuffer || mCurrentPosition == mEndPosition) { + return kEOF; + } + + nsScannerIterator start, end; + + start = mCurrentPosition; + + if ((int32_t)mCountRemaining <= aOffset) { + return kEOF; + } + + if (aOffset > 0) { + start.advance(aOffset); + } + + if (mCountRemaining < uint32_t(aNumChars + aOffset)) { + end = mEndPosition; + } + else { + end = start; + end.advance(aNumChars); + } + + CopyUnicodeTo(start, end, aStr); + + return NS_OK; +} + + +/** + * Skip whitespace on scanner input stream + * + * @update gess 3/25/98 + * @param + * @return error status + */ +nsresult nsScanner::SkipWhitespace(int32_t& aNewlinesSkipped) { + + if (!mSlidingBuffer) { + return kEOF; + } + + char16_t theChar = 0; + nsresult result = Peek(theChar); + + if (NS_FAILED(result)) { + return result; + } + + nsScannerIterator current = mCurrentPosition; + bool done = false; + bool skipped = false; + + while (!done && current != mEndPosition) { + switch(theChar) { + case '\n': + case '\r': ++aNewlinesSkipped; + case ' ' : + case '\t': + { + skipped = true; + char16_t thePrevChar = theChar; + theChar = (++current != mEndPosition) ? *current : '\0'; + if ((thePrevChar == '\r' && theChar == '\n') || + (thePrevChar == '\n' && theChar == '\r')) { + theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF + } + } + break; + default: + done = true; + break; + } + } + + if (skipped) { + SetPosition(current); + if (current == mEndPosition) { + result = kEOF; + } + } + + return result; +} + +/** + * Skip over chars as long as they equal given char + * + * @update gess 3/25/98 + * @param + * @return error code + */ +nsresult nsScanner::SkipOver(char16_t aSkipChar){ + + if (!mSlidingBuffer) { + return kEOF; + } + + char16_t ch=0; + nsresult result=NS_OK; + + while(NS_OK==result) { + result=Peek(ch); + if(NS_OK == result) { + if(ch!=aSkipChar) { + break; + } + GetChar(ch); + } + else break; + } //while + return result; + +} + +#if 0 +void DoErrTest(nsString& aString) { + int32_t pos=aString.FindChar(0); + if(kNotFound', or a '/'. + * + * @param aString - receives new data from stream + * @return error code + */ +nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) { + + if (!mSlidingBuffer) { + return kEOF; + } + + char16_t theChar=0; + nsresult result=Peek(theChar); + nsScannerIterator current, end; + bool found=false; + + current = mCurrentPosition; + end = mEndPosition; + + // Loop until we find an illegal character. Everything is then appended + // later. + while(current != end && !found) { + theChar=*current; + + switch(theChar) { + case '\n': + case '\r': + case ' ' : + case '\t': + case '\v': + case '\f': + case '<': + case '>': + case '/': + found = true; + break; + + case '\0': + ReplaceCharacter(current, sInvalid); + break; + + default: + break; + } + + if (!found) { + ++current; + } + } + + // Don't bother appending nothing. + if (current != mCurrentPosition) { + AppendUnicodeTo(mCurrentPosition, current, aString); + } + + SetPosition(current); + if (current == end) { + result = kEOF; + } + + //DoErrTest(aString); + + return result; +} + +/** + * Consume characters until you run into a char that's not valid in an + * entity name + * + * @param aString - receives new data from stream + * @return error code + */ +nsresult nsScanner::ReadEntityIdentifier(nsString& aString) { + + if (!mSlidingBuffer) { + return kEOF; + } + + char16_t theChar=0; + nsresult result=Peek(theChar); + nsScannerIterator origin, current, end; + bool found=false; + + origin = mCurrentPosition; + current = mCurrentPosition; + end = mEndPosition; + + while(current != end) { + + theChar=*current; + if(theChar) { + found=false; + switch(theChar) { + case '_': + case '-': + case '.': + // Don't allow ':' in entity names. See bug 23791 + found = true; + break; + default: + found = ('a'<=theChar && theChar<='z') || + ('A'<=theChar && theChar<='Z') || + ('0'<=theChar && theChar<='9'); + break; + } + + if(!found) { + AppendUnicodeTo(mCurrentPosition, current, aString); + break; + } + } + ++current; + } + + SetPosition(current); + if (current == end) { + AppendUnicodeTo(origin, current, aString); + return kEOF; + } + + //DoErrTest(aString); + + return result; +} + +/** + * Consume digits + * + * @param aString - should contain digits + * @return error code + */ +nsresult nsScanner::ReadNumber(nsString& aString,int32_t aBase) { + + if (!mSlidingBuffer) { + return kEOF; + } + + NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported"); + + char16_t theChar=0; + nsresult result=Peek(theChar); + nsScannerIterator origin, current, end; + + origin = mCurrentPosition; + current = origin; + end = mEndPosition; + + bool done = false; + while(current != end) { + theChar=*current; + if(theChar) { + done = (theChar < '0' || theChar > '9') && + ((aBase == 16)? (theChar < 'A' || theChar > 'F') && + (theChar < 'a' || theChar > 'f') + :true); + if(done) { + AppendUnicodeTo(origin, current, aString); + break; + } + } + ++current; + } + + SetPosition(current); + if (current == end) { + AppendUnicodeTo(origin, current, aString); + return kEOF; + } + + //DoErrTest(aString); + + return result; +} + +/** + * Consume characters until you find the terminal char + * + * @update gess 3/25/98 + * @param aString receives new data from stream + * @param addTerminal tells us whether to append terminal to aString + * @return error code + */ +nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString, + int32_t& aNewlinesSkipped, + bool& aHaveCR) { + + aHaveCR = false; + + if (!mSlidingBuffer) { + return kEOF; + } + + char16_t theChar = 0; + nsresult result = Peek(theChar); + + if (NS_FAILED(result)) { + return result; + } + + nsScannerIterator origin, current, end; + bool done = false; + + origin = mCurrentPosition; + current = origin; + end = mEndPosition; + + bool haveCR = false; + + while(!done && current != end) { + switch(theChar) { + case '\n': + case '\r': + { + ++aNewlinesSkipped; + char16_t thePrevChar = theChar; + theChar = (++current != end) ? *current : '\0'; + if ((thePrevChar == '\r' && theChar == '\n') || + (thePrevChar == '\n' && theChar == '\r')) { + theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF + haveCR = true; + } else if (thePrevChar == '\r') { + // Lone CR becomes CRLF; callers should know to remove extra CRs + AppendUnicodeTo(origin, current, aString); + aString.writable().Append(char16_t('\n')); + origin = current; + haveCR = true; + } + } + break; + case ' ' : + case '\t': + theChar = (++current != end) ? *current : '\0'; + break; + default: + done = true; + AppendUnicodeTo(origin, current, aString); + break; + } + } + + SetPosition(current); + if (current == end) { + AppendUnicodeTo(origin, current, aString); + result = kEOF; + } + + aHaveCR = haveCR; + return result; +} + +//XXXbz callers of this have to manage their lone '\r' themselves if they want +//it to work. Good thing they're all in view-source and it deals. +nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart, + nsScannerIterator& aEnd, + int32_t& aNewlinesSkipped) { + + if (!mSlidingBuffer) { + return kEOF; + } + + char16_t theChar = 0; + nsresult result = Peek(theChar); + + if (NS_FAILED(result)) { + return result; + } + + nsScannerIterator origin, current, end; + bool done = false; + + origin = mCurrentPosition; + current = origin; + end = mEndPosition; + + while(!done && current != end) { + switch(theChar) { + case '\n': + case '\r': ++aNewlinesSkipped; + case ' ' : + case '\t': + { + char16_t thePrevChar = theChar; + theChar = (++current != end) ? *current : '\0'; + if ((thePrevChar == '\r' && theChar == '\n') || + (thePrevChar == '\n' && theChar == '\r')) { + theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF + } + } + break; + default: + done = true; + aStart = origin; + aEnd = current; + break; + } + } + + SetPosition(current); + if (current == end) { + aStart = origin; + aEnd = current; + result = kEOF; + } + + return result; +} + +/** + * Consume characters until you encounter one contained in given + * input set. + * + * @update gess 3/25/98 + * @param aString will contain the result of this method + * @param aTerminalSet is an ordered string that contains + * the set of INVALID characters + * @return error code + */ +nsresult nsScanner::ReadUntil(nsAString& aString, + const nsReadEndCondition& aEndCondition, + bool addTerminal) +{ + if (!mSlidingBuffer) { + return kEOF; + } + + nsScannerIterator origin, current; + const char16_t* setstart = aEndCondition.mChars; + const char16_t* setcurrent; + + origin = mCurrentPosition; + current = origin; + + char16_t theChar=0; + nsresult result=Peek(theChar); + + if (NS_FAILED(result)) { + return result; + } + + while (current != mEndPosition) { + theChar = *current; + if (theChar == '\0') { + ReplaceCharacter(current, sInvalid); + theChar = sInvalid; + } + + // Filter out completely wrong characters + // Check if all bits are in the required area + if(!(theChar & aEndCondition.mFilter)) { + // They were. Do a thorough check. + + setcurrent = setstart; + while (*setcurrent) { + if (*setcurrent == theChar) { + if(addTerminal) + ++current; + AppendUnicodeTo(origin, current, aString); + SetPosition(current); + + //DoErrTest(aString); + + return NS_OK; + } + ++setcurrent; + } + } + + ++current; + } + + // If we are here, we didn't find any terminator in the string and + // current = mEndPosition + SetPosition(current); + AppendUnicodeTo(origin, current, aString); + return kEOF; +} + +nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString, + const nsReadEndCondition& aEndCondition, + bool addTerminal) +{ + if (!mSlidingBuffer) { + return kEOF; + } + + nsScannerIterator origin, current; + const char16_t* setstart = aEndCondition.mChars; + const char16_t* setcurrent; + + origin = mCurrentPosition; + current = origin; + + char16_t theChar=0; + nsresult result=Peek(theChar); + + if (NS_FAILED(result)) { + return result; + } + + while (current != mEndPosition) { + theChar = *current; + if (theChar == '\0') { + ReplaceCharacter(current, sInvalid); + theChar = sInvalid; + } + + // Filter out completely wrong characters + // Check if all bits are in the required area + if(!(theChar & aEndCondition.mFilter)) { + // They were. Do a thorough check. + + setcurrent = setstart; + while (*setcurrent) { + if (*setcurrent == theChar) { + if(addTerminal) + ++current; + AppendUnicodeTo(origin, current, aString); + SetPosition(current); + + //DoErrTest(aString); + + return NS_OK; + } + ++setcurrent; + } + } + + ++current; + } + + // If we are here, we didn't find any terminator in the string and + // current = mEndPosition + SetPosition(current); + AppendUnicodeTo(origin, current, aString); + return kEOF; +} + +nsresult nsScanner::ReadUntil(nsScannerIterator& aStart, + nsScannerIterator& aEnd, + const nsReadEndCondition &aEndCondition, + bool addTerminal) +{ + if (!mSlidingBuffer) { + return kEOF; + } + + nsScannerIterator origin, current; + const char16_t* setstart = aEndCondition.mChars; + const char16_t* setcurrent; + + origin = mCurrentPosition; + current = origin; + + char16_t theChar=0; + nsresult result=Peek(theChar); + + if (NS_FAILED(result)) { + aStart = aEnd = current; + return result; + } + + while (current != mEndPosition) { + theChar = *current; + if (theChar == '\0') { + ReplaceCharacter(current, sInvalid); + theChar = sInvalid; + } + + // Filter out completely wrong characters + // Check if all bits are in the required area + if(!(theChar & aEndCondition.mFilter)) { + // They were. Do a thorough check. + setcurrent = setstart; + while (*setcurrent) { + if (*setcurrent == theChar) { + if(addTerminal) + ++current; + aStart = origin; + aEnd = current; + SetPosition(current); + + return NS_OK; + } + ++setcurrent; + } + } + + ++current; + } + + // If we are here, we didn't find any terminator in the string and + // current = mEndPosition + SetPosition(current); + aStart = origin; + aEnd = current; + return kEOF; +} + +/** + * Consumes chars until you see the given terminalChar + * + * @update gess 3/25/98 + * @param + * @return error code + */ +nsresult nsScanner::ReadUntil(nsAString& aString, + char16_t aTerminalChar, + bool addTerminal) +{ + if (!mSlidingBuffer) { + return kEOF; + } + + nsScannerIterator origin, current; + + origin = mCurrentPosition; + current = origin; + + char16_t theChar; + nsresult result = Peek(theChar); + + if (NS_FAILED(result)) { + return result; + } + + while (current != mEndPosition) { + theChar = *current; + if (theChar == '\0') { + ReplaceCharacter(current, sInvalid); + theChar = sInvalid; + } + + if (aTerminalChar == theChar) { + if(addTerminal) + ++current; + AppendUnicodeTo(origin, current, aString); + SetPosition(current); + return NS_OK; + } + ++current; + } + + // If we are here, we didn't find any terminator in the string and + // current = mEndPosition + AppendUnicodeTo(origin, current, aString); + SetPosition(current); + return kEOF; + +} + +void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd) +{ + aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd); +} + +void nsScanner::CurrentPosition(nsScannerIterator& aPosition) +{ + aPosition = mCurrentPosition; +} + +void nsScanner::EndReading(nsScannerIterator& aPosition) +{ + aPosition = mEndPosition; +} + +void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate, bool aReverse) +{ + if (mSlidingBuffer) { +#ifdef DEBUG + uint32_t origRemaining = mCountRemaining; +#endif + + if (aReverse) { + mCountRemaining += (Distance(aPosition, mCurrentPosition)); + } + else { + mCountRemaining -= (Distance(mCurrentPosition, aPosition)); + } + + NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) || + (mCountRemaining <= origRemaining && !aReverse), + "Improper use of nsScanner::SetPosition. Make sure to set the" + " aReverse parameter correctly"); + + mCurrentPosition = aPosition; + if (aTerminate && (mCurrentPosition == mEndPosition)) { + mMarkPosition = mCurrentPosition; + mSlidingBuffer->DiscardPrefix(mCurrentPosition); + } + } +} + +void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition, + char16_t aChar) +{ + if (mSlidingBuffer) { + mSlidingBuffer->ReplaceCharacter(aPosition, aChar); + } +} + +bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf, + nsIRequest *aRequest, + int32_t aErrorPos) +{ + uint32_t countRemaining = mCountRemaining; + if (!mSlidingBuffer) { + mSlidingBuffer = new nsScannerString(aBuf); + if (!mSlidingBuffer) + return false; + mSlidingBuffer->BeginReading(mCurrentPosition); + mMarkPosition = mCurrentPosition; + mSlidingBuffer->EndReading(mEndPosition); + mCountRemaining = aBuf->DataLength(); + } + else { + mSlidingBuffer->AppendBuffer(aBuf); + if (mCurrentPosition == mEndPosition) { + mSlidingBuffer->BeginReading(mCurrentPosition); + } + mSlidingBuffer->EndReading(mEndPosition); + mCountRemaining += aBuf->DataLength(); + } + + if (aErrorPos != -1 && !mHasInvalidCharacter) { + mHasInvalidCharacter = true; + mFirstInvalidPosition = mCurrentPosition; + mFirstInvalidPosition.advance(countRemaining + aErrorPos); + } + + if (mFirstNonWhitespacePosition == -1) { + nsScannerIterator iter(mCurrentPosition); + nsScannerIterator end(mEndPosition); + + while (iter != end) { + if (!nsCRT::IsAsciiSpace(*iter)) { + mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter); + + break; + } + + ++iter; + } + } + return true; +} + +/** + * call this to copy bytes out of the scanner that have not yet been consumed + * by the tokenization process. + * + * @update gess 5/12/98 + * @param aCopyBuffer is where the scanner buffer will be copied to + * @return nada + */ +void nsScanner::CopyUnusedData(nsString& aCopyBuffer) { + if (!mSlidingBuffer) { + aCopyBuffer.Truncate(); + return; + } + + nsScannerIterator start, end; + start = mCurrentPosition; + end = mEndPosition; + + CopyUnicodeTo(start, end, aCopyBuffer); +} + +/** + * Retrieve the name of the file that the scanner is reading from. + * In some cases, it's just a given name, because the scanner isn't + * really reading from a file. + * + * @update gess 5/12/98 + * @return + */ +nsString& nsScanner::GetFilename(void) { + return mFilename; +} + +/** + * Conduct self test. Actually, selftesting for this class + * occurs in the parser selftest. + * + * @update gess 3/25/98 + * @param + * @return + */ + +void nsScanner::SelfTest(void) { +#ifdef _DEBUG +#endif +} + +void nsScanner::OverrideReplacementCharacter(char16_t aReplacementCharacter) +{ + mReplacementCharacter = aReplacementCharacter; + + if (mHasInvalidCharacter) { + ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter); + } +} +