michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0:
michael@0: /**
michael@0: * MODULE NOTES:
michael@0: *
michael@0: * This class does two primary jobs:
michael@0: * 1) It iterates the tokens provided during the
michael@0: * tokenization process, identifing where elements
michael@0: * begin and end (doing validation and normalization).
michael@0: * 2) It controls and coordinates with an instance of
michael@0: * the IContentSink interface, to coordinate the
michael@0: * the production of the content model.
michael@0: *
michael@0: * The basic operation of this class assumes that an HTML
michael@0: * document is non-normalized. Therefore, we don't process
michael@0: * the document in a normalized way. Don't bother to look
michael@0: * for methods like: doHead() or doBody().
michael@0: *
michael@0: * Instead, in order to be backward compatible, we must
michael@0: * scan the set of tokens and perform this basic set of
michael@0: * operations:
michael@0: * 1) Determine the token type (easy, since the tokens know)
michael@0: * 2) Determine the appropriate section of the HTML document
michael@0: * each token belongs in (HTML,HEAD,BODY,FRAMESET).
michael@0: * 3) Insert content into our document (via the sink) into
michael@0: * the correct section.
michael@0: * 4) In the case of tags that belong in the BODY, we must
michael@0: * ensure that our underlying document state reflects
michael@0: * the appropriate context for our tag.
michael@0: *
michael@0: * For example,if we see a
, we must ensure our
michael@0: * document contains a table into which the row can
michael@0: * be placed. This may result in "implicit containers"
michael@0: * created to ensure a well-formed document.
michael@0: *
michael@0: */
michael@0:
michael@0: #ifndef NS_PARSER__
michael@0: #define NS_PARSER__
michael@0:
michael@0: #include "nsIParser.h"
michael@0: #include "nsDeque.h"
michael@0: #include "nsIURL.h"
michael@0: #include "CParserContext.h"
michael@0: #include "nsParserCIID.h"
michael@0: #include "nsITokenizer.h"
michael@0: #include "nsHTMLTags.h"
michael@0: #include "nsIContentSink.h"
michael@0: #include "nsCOMArray.h"
michael@0: #include "nsCycleCollectionParticipant.h"
michael@0: #include "nsWeakReference.h"
michael@0:
michael@0: class nsIDTD;
michael@0: class nsScanner;
michael@0: class nsIRunnable;
michael@0:
michael@0: #ifdef _MSC_VER
michael@0: #pragma warning( disable : 4275 )
michael@0: #endif
michael@0:
michael@0:
michael@0: class nsParser : public nsIParser,
michael@0: public nsIStreamListener,
michael@0: public nsSupportsWeakReference
michael@0: {
michael@0: public:
michael@0: /**
michael@0: * Called on module init
michael@0: */
michael@0: static nsresult Init();
michael@0:
michael@0: /**
michael@0: * Called on module shutdown
michael@0: */
michael@0: static void Shutdown();
michael@0:
michael@0: NS_DECL_CYCLE_COLLECTING_ISUPPORTS
michael@0: NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
michael@0:
michael@0: /**
michael@0: * default constructor
michael@0: * @update gess5/11/98
michael@0: */
michael@0: nsParser();
michael@0:
michael@0: /**
michael@0: * Destructor
michael@0: * @update gess5/11/98
michael@0: */
michael@0: virtual ~nsParser();
michael@0:
michael@0: /**
michael@0: * Select given content sink into parser for parser output
michael@0: * @update gess5/11/98
michael@0: * @param aSink is the new sink to be used by parser
michael@0: * @return old sink, or nullptr
michael@0: */
michael@0: NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);
michael@0:
michael@0: /**
michael@0: * retrive the sink set into the parser
michael@0: * @update gess5/11/98
michael@0: * @param aSink is the new sink to be used by parser
michael@0: * @return old sink, or nullptr
michael@0: */
michael@0: NS_IMETHOD_(nsIContentSink*) GetContentSink(void);
michael@0:
michael@0: /**
michael@0: * Call this method once you've created a parser, and want to instruct it
michael@0: * about the command which caused the parser to be constructed. For example,
michael@0: * this allows us to select a DTD which can do, say, view-source.
michael@0: *
michael@0: * @update gess 3/25/98
michael@0: * @param aCommand -- ptrs to string that contains command
michael@0: * @return nada
michael@0: */
michael@0: NS_IMETHOD_(void) GetCommand(nsCString& aCommand);
michael@0: NS_IMETHOD_(void) SetCommand(const char* aCommand);
michael@0: NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);
michael@0:
michael@0: /**
michael@0: * Call this method once you've created a parser, and want to instruct it
michael@0: * about what charset to load
michael@0: *
michael@0: * @update ftang 4/23/99
michael@0: * @param aCharset- the charset of a document
michael@0: * @param aCharsetSource- the source of the charset
michael@0: * @return nada
michael@0: */
michael@0: NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
michael@0:
michael@0: NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource)
michael@0: {
michael@0: aCharset = mCharset;
michael@0: aSource = mCharsetSource;
michael@0: }
michael@0:
michael@0: /**
michael@0: * Cause parser to parse input from given URL
michael@0: * @update gess5/11/98
michael@0: * @param aURL is a descriptor for source document
michael@0: * @param aListener is a listener to forward notifications to
michael@0: * @return TRUE if all went well -- FALSE otherwise
michael@0: */
michael@0: NS_IMETHOD Parse(nsIURI* aURL,
michael@0: nsIRequestObserver* aListener = nullptr,
michael@0: void* aKey = 0,
michael@0: nsDTDMode aMode = eDTDMode_autodetect);
michael@0:
michael@0: /**
michael@0: * This method needs documentation
michael@0: */
michael@0: NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
michael@0: nsTArray& aTagStack);
michael@0:
michael@0: /**
michael@0: * This method gets called when the tokens have been consumed, and it's time
michael@0: * to build the model via the content sink.
michael@0: * @update gess5/11/98
michael@0: * @return YES if model building went well -- NO otherwise.
michael@0: */
michael@0: NS_IMETHOD BuildModel(void);
michael@0:
michael@0: NS_IMETHOD ContinueInterruptedParsing();
michael@0: NS_IMETHOD_(void) BlockParser();
michael@0: NS_IMETHOD_(void) UnblockParser();
michael@0: NS_IMETHOD_(void) ContinueInterruptedParsingAsync();
michael@0: NS_IMETHOD Terminate(void);
michael@0:
michael@0: /**
michael@0: * Call this to query whether the parser is enabled or not.
michael@0: *
michael@0: * @update vidur 4/12/99
michael@0: * @return current state
michael@0: */
michael@0: NS_IMETHOD_(bool) IsParserEnabled();
michael@0:
michael@0: /**
michael@0: * Call this to query whether the parser thinks it's done with parsing.
michael@0: *
michael@0: * @update rickg 5/12/01
michael@0: * @return complete state
michael@0: */
michael@0: NS_IMETHOD_(bool) IsComplete();
michael@0:
michael@0: /**
michael@0: * This rather arcane method (hack) is used as a signal between the
michael@0: * DTD and the parser. It allows the DTD to tell the parser that content
michael@0: * that comes through (parser::parser(string)) but not consumed should
michael@0: * propagate into the next string based parse call.
michael@0: *
michael@0: * @update gess 9/1/98
michael@0: * @param aState determines whether we propagate unused string content.
michael@0: * @return current state
michael@0: */
michael@0: void SetUnusedInput(nsString& aBuffer);
michael@0:
michael@0: /**
michael@0: * This method gets called (automatically) during incremental parsing
michael@0: * @update gess5/11/98
michael@0: * @return TRUE if all went well, otherwise FALSE
michael@0: */
michael@0: virtual nsresult ResumeParse(bool allowIteration = true,
michael@0: bool aIsFinalChunk = false,
michael@0: bool aCanInterrupt = true);
michael@0:
michael@0: //*********************************************
michael@0: // These methods are callback methods used by
michael@0: // net lib to let us know about our inputstream.
michael@0: //*********************************************
michael@0: // nsIRequestObserver methods:
michael@0: NS_DECL_NSIREQUESTOBSERVER
michael@0:
michael@0: // nsIStreamListener methods:
michael@0: NS_DECL_NSISTREAMLISTENER
michael@0:
michael@0: void PushContext(CParserContext& aContext);
michael@0: CParserContext* PopContext();
michael@0: CParserContext* PeekContext() {return mParserContext;}
michael@0:
michael@0: /**
michael@0: * Get the channel associated with this parser
michael@0: * @update harishd,gagan 07/17/01
michael@0: * @param aChannel out param that will contain the result
michael@0: * @return NS_OK if successful
michael@0: */
michael@0: NS_IMETHOD GetChannel(nsIChannel** aChannel);
michael@0:
michael@0: /**
michael@0: * Get the DTD associated with this parser
michael@0: * @update vidur 9/29/99
michael@0: * @param aDTD out param that will contain the result
michael@0: * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
michael@0: */
michael@0: NS_IMETHOD GetDTD(nsIDTD** aDTD);
michael@0:
michael@0: /**
michael@0: * Get the nsIStreamListener for this parser
michael@0: */
michael@0: virtual nsIStreamListener* GetStreamListener();
michael@0:
michael@0: void SetSinkCharset(nsACString& aCharset);
michael@0:
michael@0: /**
michael@0: * Removes continue parsing events
michael@0: * @update kmcclusk 5/18/98
michael@0: */
michael@0:
michael@0: NS_IMETHODIMP CancelParsingEvents();
michael@0:
michael@0: /**
michael@0: * Return true.
michael@0: */
michael@0: virtual bool IsInsertionPointDefined();
michael@0:
michael@0: /**
michael@0: * No-op.
michael@0: */
michael@0: virtual void BeginEvaluatingParserInsertedScript();
michael@0:
michael@0: /**
michael@0: * No-op.
michael@0: */
michael@0: virtual void EndEvaluatingParserInsertedScript();
michael@0:
michael@0: /**
michael@0: * No-op.
michael@0: */
michael@0: virtual void MarkAsNotScriptCreated(const char* aCommand);
michael@0:
michael@0: /**
michael@0: * Always false.
michael@0: */
michael@0: virtual bool IsScriptCreated();
michael@0:
michael@0: /**
michael@0: * Set to parser state to indicate whether parsing tokens can be interrupted
michael@0: * @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
michael@0: * @update kmcclusk 5/18/98
michael@0: */
michael@0: void SetCanInterrupt(bool aCanInterrupt);
michael@0:
michael@0: /**
michael@0: * This is called when the final chunk has been
michael@0: * passed to the parser and the content sink has
michael@0: * interrupted token processing. It schedules
michael@0: * a ParserContinue PL_Event which will ask the parser
michael@0: * to HandleParserContinueEvent when it is handled.
michael@0: * @update kmcclusk6/1/2001
michael@0: */
michael@0: nsresult PostContinueEvent();
michael@0:
michael@0: /**
michael@0: * Fired when the continue parse event is triggered.
michael@0: * @update kmcclusk 5/18/98
michael@0: */
michael@0: void HandleParserContinueEvent(class nsParserContinueEvent *);
michael@0:
michael@0: virtual void Reset() {
michael@0: Cleanup();
michael@0: Initialize();
michael@0: }
michael@0:
michael@0: bool IsScriptExecuting() {
michael@0: return mSink && mSink->IsScriptExecuting();
michael@0: }
michael@0:
michael@0: bool IsOkToProcessNetworkData() {
michael@0: return !IsScriptExecuting() && !mProcessingNetworkData;
michael@0: }
michael@0:
michael@0: protected:
michael@0:
michael@0: void Initialize(bool aConstructor = false);
michael@0: void Cleanup();
michael@0:
michael@0: /**
michael@0: *
michael@0: * @update gess5/18/98
michael@0: * @param
michael@0: * @return
michael@0: */
michael@0: nsresult WillBuildModel(nsString& aFilename);
michael@0:
michael@0: /**
michael@0: *
michael@0: * @update gess5/18/98
michael@0: * @param
michael@0: * @return
michael@0: */
michael@0: nsresult DidBuildModel(nsresult anErrorCode);
michael@0:
michael@0: private:
michael@0:
michael@0: /*******************************************
michael@0: These are the tokenization methods...
michael@0: *******************************************/
michael@0:
michael@0: /**
michael@0: * Part of the code sandwich, this gets called right before
michael@0: * the tokenization process begins. The main reason for
michael@0: * this call is to allow the delegate to do initialization.
michael@0: *
michael@0: * @update gess 3/25/98
michael@0: * @param
michael@0: * @return TRUE if it's ok to proceed
michael@0: */
michael@0: bool WillTokenize(bool aIsFinalChunk = false);
michael@0:
michael@0:
michael@0: /**
michael@0: * This is the primary control routine. It iteratively
michael@0: * consumes tokens until an error occurs or you run out
michael@0: * of data.
michael@0: *
michael@0: * @update gess 3/25/98
michael@0: * @return error code
michael@0: */
michael@0: nsresult Tokenize(bool aIsFinalChunk = false);
michael@0:
michael@0: /**
michael@0: * Pushes XML fragment parsing data to expat without an input stream.
michael@0: */
michael@0: nsresult Parse(const nsAString& aSourceBuffer,
michael@0: void* aKey,
michael@0: bool aLastCall);
michael@0:
michael@0: protected:
michael@0: //*********************************************
michael@0: // And now, some data members...
michael@0: //*********************************************
michael@0:
michael@0:
michael@0: CParserContext* mParserContext;
michael@0: nsCOMPtr mDTD;
michael@0: nsCOMPtr mObserver;
michael@0: nsCOMPtr mSink;
michael@0: nsIRunnable* mContinueEvent; // weak ref
michael@0:
michael@0: eParserCommands mCommand;
michael@0: nsresult mInternalState;
michael@0: nsresult mStreamStatus;
michael@0: int32_t mCharsetSource;
michael@0:
michael@0: uint16_t mFlags;
michael@0:
michael@0: nsString mUnusedInput;
michael@0: nsCString mCharset;
michael@0: nsCString mCommandStr;
michael@0:
michael@0: bool mProcessingNetworkData;
michael@0: bool mIsAboutBlank;
michael@0: };
michael@0:
michael@0: #endif
michael@0: