diff -r 000000000000 -r 6474c204b198 parser/htmlparser/src/nsScanner.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parser/htmlparser/src/nsScanner.h Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,309 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + + +/** + * MODULE NOTES: + * @update gess 4/1/98 + * + * The scanner is a low-level service class that knows + * how to consume characters out of an (internal) stream. + * This class also offers a series of utility methods + * that most tokenizers want, such as readUntil() + * and SkipWhitespace(). + */ + + +#ifndef SCANNER +#define SCANNER + +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsIParser.h" +#include "nsIUnicodeDecoder.h" +#include "nsScannerString.h" + +class nsParser; + +class nsReadEndCondition { +public: + const char16_t *mChars; + char16_t mFilter; + explicit nsReadEndCondition(const char16_t* aTerminateChars); +private: + nsReadEndCondition(const nsReadEndCondition& aOther); // No copying + void operator=(const nsReadEndCondition& aOther); // No assigning +}; + +class nsScanner { + public: + + /** + * Use this constructor for the XML fragment parsing case + */ + nsScanner(const nsAString& anHTMLString); + + /** + * Use this constructor if you want i/o to be based on + * a file (therefore a stream) or just data you provide via Append(). + */ + nsScanner(nsString& aFilename, bool aCreateStream); + + ~nsScanner(); + + /** + * retrieve next char from internal input stream + * + * @update gess 3/25/98 + * @param ch is the char to accept new value + * @return error code reflecting read status + */ + nsresult GetChar(char16_t& ch); + + /** + * peek ahead to consume next char from scanner's internal + * input buffer + * + * @update gess 3/25/98 + * @param ch is the char to accept new value + * @return error code reflecting read status + */ + nsresult Peek(char16_t& ch, uint32_t aOffset=0); + + nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0); + + /** + * Skip over chars as long as they equal given char + * + * @update gess 3/25/98 + * @param char to be skipped + * @return error code + */ + nsresult SkipOver(char16_t aSkipChar); + + /** + * Skip whitespace on scanner input stream + * + * @update gess 3/25/98 + * @return error status + */ + nsresult SkipWhitespace(int32_t& aNewlinesSkipped); + + /** + * Consume characters until you run into space, a '<', a '>', or a '/'. + * + * @param aString - receives new data from stream + * @return error code + */ + nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString); + + /** + * Consume characters until you run into a char that's not valid in an + * entity name + * + * @param aString - receives new data from stream + * @return error code + */ + nsresult ReadEntityIdentifier(nsString& aString); + nsresult ReadNumber(nsString& aString,int32_t aBase); + nsresult ReadWhitespace(nsScannerSharedSubstring& aString, + int32_t& aNewlinesSkipped, + bool& aHaveCR); + nsresult ReadWhitespace(nsScannerIterator& aStart, + nsScannerIterator& aEnd, + int32_t& aNewlinesSkipped); + + /** + * Consume characters until you find the terminal char + * + * @update gess 3/25/98 + * @param aString receives new data from stream + * @param aTerminal contains terminating char + * @param addTerminal tells us whether to append terminal to aString + * @return error code + */ + nsresult ReadUntil(nsAString& aString, + char16_t aTerminal, + bool addTerminal); + + /** + * Consume characters until you find one contained in given + * terminal set. + * + * @update gess 3/25/98 + * @param aString receives new data from stream + * @param aTermSet contains set of terminating chars + * @param addTerminal tells us whether to append terminal to aString + * @return error code + */ + nsresult ReadUntil(nsAString& aString, + const nsReadEndCondition& aEndCondition, + bool addTerminal); + + nsresult ReadUntil(nsScannerSharedSubstring& aString, + const nsReadEndCondition& aEndCondition, + bool addTerminal); + + nsresult ReadUntil(nsScannerIterator& aStart, + nsScannerIterator& aEnd, + const nsReadEndCondition& aEndCondition, + bool addTerminal); + + /** + * Records current offset position in input stream. This allows us + * to back up to this point if the need should arise, such as when + * tokenization gets interrupted. + * + * @update gess 5/12/98 + * @param + * @return + */ + int32_t Mark(void); + + /** + * Resets current offset position of input stream to marked position. + * This allows us to back up to this point if the need should arise, + * such as when tokenization gets interrupted. + * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! + * + * @update gess 5/12/98 + * @param + * @return + */ + void RewindToMark(void); + + + /** + * + * + * @update harishd 01/12/99 + * @param + * @return + */ + bool UngetReadable(const nsAString& aBuffer); + + /** + * + * + * @update gess 5/13/98 + * @param + * @return + */ + nsresult Append(const nsAString& aBuffer); + + /** + * + * + * @update gess 5/21/98 + * @param + * @return + */ + nsresult Append(const char* aBuffer, uint32_t aLen, + nsIRequest *aRequest); + + /** + * Call this to copy bytes out of the scanner that have not yet been consumed + * by the tokenization process. + * + * @update gess 5/12/98 + * @param aCopyBuffer is where the scanner buffer will be copied to + * @return nada + */ + void CopyUnusedData(nsString& aCopyBuffer); + + /** + * Retrieve the name of the file that the scanner is reading from. + * In some cases, it's just a given name, because the scanner isn't + * really reading from a file. + * + * @update gess 5/12/98 + * @return + */ + nsString& GetFilename(void); + + static void SelfTest(); + + /** + * Use this setter to change the scanner's unicode decoder + * + * @update ftang 3/02/99 + * @param aCharset a normalized (alias resolved) charset name + * @param aCharsetSource- where the charset info came from + * @return + */ + nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource); + + void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd); + void CurrentPosition(nsScannerIterator& aPosition); + void EndReading(nsScannerIterator& aPosition); + void SetPosition(nsScannerIterator& aPosition, + bool aTruncate = false, + bool aReverse = false); + void ReplaceCharacter(nsScannerIterator& aPosition, + char16_t aChar); + + /** + * Internal method used to cause the internal buffer to + * be filled with data. + * + * @update gess4/3/98 + */ + bool IsIncremental(void) {return mIncremental;} + void SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;} + + /** + * Return the position of the first non-whitespace + * character. This is only reliable before consumers start + * reading from this scanner. + */ + int32_t FirstNonWhitespacePosition() + { + return mFirstNonWhitespacePosition; + } + + /** + * Override replacement character used by nsIUnicodeDecoder. + * Default behavior is that it uses nsIUnicodeDecoder's mapping. + * + * @param aReplacementCharacter the replacement character + * XML (expat) parser uses 0xffff + */ + void OverrideReplacementCharacter(char16_t aReplacementCharacter); + + protected: + + bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, int32_t aErrorPos = -1); + bool AppendToBuffer(const nsAString& aStr) + { + nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr); + if (!buf) + return false; + AppendToBuffer(buf, nullptr); + return true; + } + + nsScannerString* mSlidingBuffer; + nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer + nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here) + nsScannerIterator mEndPosition; // The current end of the scanner buffer + nsScannerIterator mFirstInvalidPosition; // The position of the first invalid character that was detected + nsString mFilename; + uint32_t mCountRemaining; // The number of bytes still to be read + // from the scanner buffer + bool mIncremental; + bool mHasInvalidCharacter; + char16_t mReplacementCharacter; + int32_t mFirstNonWhitespacePosition; + int32_t mCharsetSource; + nsCString mCharset; + nsCOMPtr mUnicodeDecoder; + + private: + nsScanner &operator =(const nsScanner &); // Not implemented. +}; + +#endif + +