The Tor Browser: parser/htmlparser/src/nsParser.h@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

     2 /* This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this

     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     6 /**

     7  * MODULE NOTES:

     8  *

     9  *  This class does two primary jobs:

    10  *    1) It iterates the tokens provided during the

    11  *       tokenization process, identifing where elements

    12  *       begin and end (doing validation and normalization).

    13  *    2) It controls and coordinates with an instance of

    14  *       the IContentSink interface, to coordinate the

    15  *       the production of the content model.

    16  *

    17  *  The basic operation of this class assumes that an HTML

    18  *  document is non-normalized. Therefore, we don't process

    19  *  the document in a normalized way. Don't bother to look

    20  *  for methods like: doHead() or doBody().

    21  *

    22  *  Instead, in order to be backward compatible, we must

    23  *  scan the set of tokens and perform this basic set of

    24  *  operations:

    25  *    1)  Determine the token type (easy, since the tokens know)

    26  *    2)  Determine the appropriate section of the HTML document

    27  *        each token belongs in (HTML,HEAD,BODY,FRAMESET).

    28  *    3)  Insert content into our document (via the sink) into

    29  *        the correct section.

    30  *    4)  In the case of tags that belong in the BODY, we must

    31  *        ensure that our underlying document state reflects

    32  *        the appropriate context for our tag.

    33  *

    34  *        For example,if we see a <TR>, we must ensure our

    35  *        document contains a table into which the row can

    36  *        be placed. This may result in "implicit containers"

    37  *        created to ensure a well-formed document.

    38  *

    39  */

    41 #ifndef NS_PARSER__

    42 #define NS_PARSER__

    44 #include "nsIParser.h"

    45 #include "nsDeque.h"

    46 #include "nsIURL.h"

    47 #include "CParserContext.h"

    48 #include "nsParserCIID.h"

    49 #include "nsITokenizer.h"

    50 #include "nsHTMLTags.h"

    51 #include "nsIContentSink.h"

    52 #include "nsCOMArray.h"

    53 #include "nsCycleCollectionParticipant.h"

    54 #include "nsWeakReference.h"

    56 class nsIDTD;

    57 class nsScanner;

    58 class nsIRunnable;

    60 #ifdef _MSC_VER

    61 #pragma warning( disable : 4275 )

    62 #endif

    65 class nsParser : public nsIParser,

    66                  public nsIStreamListener,

    67                  public nsSupportsWeakReference

    68 {

    69   public:

    70     /**

    71      * Called on module init

    72      */

    73     static nsresult Init();

    75     /**

    76      * Called on module shutdown

    77      */

    78     static void Shutdown();

    80     NS_DECL_CYCLE_COLLECTING_ISUPPORTS

    81     NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)

    83     /**

    84      * default constructor

    85      * @update	gess5/11/98

    86      */

    87     nsParser();

    89     /**

    90      * Destructor

    91      * @update	gess5/11/98

    92      */

    93     virtual ~nsParser();

    95     /**

    96      * Select given content sink into parser for parser output

    97      * @update	gess5/11/98

    98      * @param   aSink is the new sink to be used by parser

    99      * @return  old sink, or nullptr

   100      */

   101     NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);

   103     /**

   104      * retrive the sink set into the parser

   105      * @update	gess5/11/98

   106      * @param   aSink is the new sink to be used by parser

   107      * @return  old sink, or nullptr

   108      */

   109     NS_IMETHOD_(nsIContentSink*) GetContentSink(void);

   111     /**

   112      *  Call this method once you've created a parser, and want to instruct it

   113      *  about the command which caused the parser to be constructed. For example,

   114      *  this allows us to select a DTD which can do, say, view-source.

   115      *

   116      *  @update  gess 3/25/98

   117      *  @param   aCommand -- ptrs to string that contains command

   118      *  @return	 nada

   119      */

   120     NS_IMETHOD_(void) GetCommand(nsCString& aCommand);

   121     NS_IMETHOD_(void) SetCommand(const char* aCommand);

   122     NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);

   124     /**

   125      *  Call this method once you've created a parser, and want to instruct it

   126      *  about what charset to load

   127      *

   128      *  @update  ftang 4/23/99

   129      *  @param   aCharset- the charset of a document

   130      *  @param   aCharsetSource- the source of the charset

   131      *  @return	 nada

   132      */

   133     NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource);

   135     NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource)

   136     {

   137          aCharset = mCharset;

   138          aSource = mCharsetSource;

   139     }

   141     /**

   142      * Cause parser to parse input from given URL

   143      * @update	gess5/11/98

   144      * @param   aURL is a descriptor for source document

   145      * @param   aListener is a listener to forward notifications to

   146      * @return  TRUE if all went well -- FALSE otherwise

   147      */

   148     NS_IMETHOD Parse(nsIURI* aURL,

   149                      nsIRequestObserver* aListener = nullptr,

   150                      void* aKey = 0,

   151                      nsDTDMode aMode = eDTDMode_autodetect);

   153     /**

   154      * This method needs documentation

   155      */

   156     NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,

   157                              nsTArray<nsString>& aTagStack);

   159     /**

   160      * This method gets called when the tokens have been consumed, and it's time

   161      * to build the model via the content sink.

   162      * @update	gess5/11/98

   163      * @return  YES if model building went well -- NO otherwise.

   164      */

   165     NS_IMETHOD BuildModel(void);

   167     NS_IMETHOD        ContinueInterruptedParsing();

   168     NS_IMETHOD_(void) BlockParser();

   169     NS_IMETHOD_(void) UnblockParser();

   170     NS_IMETHOD_(void) ContinueInterruptedParsingAsync();

   171     NS_IMETHOD        Terminate(void);

   173     /**

   174      * Call this to query whether the parser is enabled or not.

   175      *

   176      *  @update  vidur 4/12/99

   177      *  @return  current state

   178      */

   179     NS_IMETHOD_(bool) IsParserEnabled();

   181     /**

   182      * Call this to query whether the parser thinks it's done with parsing.

   183      *

   184      *  @update  rickg 5/12/01

   185      *  @return  complete state

   186      */

   187     NS_IMETHOD_(bool) IsComplete();

   189     /**

   190      *  This rather arcane method (hack) is used as a signal between the

   191      *  DTD and the parser. It allows the DTD to tell the parser that content

   192      *  that comes through (parser::parser(string)) but not consumed should

   193      *  propagate into the next string based parse call.

   194      *

   195      *  @update  gess 9/1/98

   196      *  @param   aState determines whether we propagate unused string content.

   197      *  @return  current state

   198      */

   199     void SetUnusedInput(nsString& aBuffer);

   201     /**

   202      * This method gets called (automatically) during incremental parsing

   203      * @update	gess5/11/98

   204      * @return  TRUE if all went well, otherwise FALSE

   205      */

   206     virtual nsresult ResumeParse(bool allowIteration = true,

   207                                  bool aIsFinalChunk = false,

   208                                  bool aCanInterrupt = true);

   210      //*********************************************

   211       // These methods are callback methods used by

   212       // net lib to let us know about our inputstream.

   213       //*********************************************

   214     // nsIRequestObserver methods:

   215     NS_DECL_NSIREQUESTOBSERVER

   217     // nsIStreamListener methods:

   218     NS_DECL_NSISTREAMLISTENER

   220     void              PushContext(CParserContext& aContext);

   221     CParserContext*   PopContext();

   222     CParserContext*   PeekContext() {return mParserContext;}

   224     /**

   225      * Get the channel associated with this parser

   226      * @update harishd,gagan 07/17/01

   227      * @param aChannel out param that will contain the result

   228      * @return NS_OK if successful

   229      */

   230     NS_IMETHOD GetChannel(nsIChannel** aChannel);

   232     /**

   233      * Get the DTD associated with this parser

   234      * @update vidur 9/29/99

   235      * @param aDTD out param that will contain the result

   236      * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error

   237      */

   238     NS_IMETHOD GetDTD(nsIDTD** aDTD);

   240     /**

   241      * Get the nsIStreamListener for this parser

   242      */

   243     virtual nsIStreamListener* GetStreamListener();

   245     void SetSinkCharset(nsACString& aCharset);

   247     /**

   248      *  Removes continue parsing events

   249      *  @update  kmcclusk 5/18/98

   250      */

   252     NS_IMETHODIMP CancelParsingEvents();

   254     /**

   255      * Return true.

   256      */

   257     virtual bool IsInsertionPointDefined();

   259     /**

   260      * No-op.

   261      */

   262     virtual void BeginEvaluatingParserInsertedScript();

   264     /**

   265      * No-op.

   266      */

   267     virtual void EndEvaluatingParserInsertedScript();

   269     /**

   270      * No-op.

   271      */

   272     virtual void MarkAsNotScriptCreated(const char* aCommand);

   274     /**

   275      * Always false.

   276      */

   277     virtual bool IsScriptCreated();

   279     /**

   280      *  Set to parser state to indicate whether parsing tokens can be interrupted

   281      *  @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.

   282      *  @update  kmcclusk 5/18/98

   283      */

   284     void SetCanInterrupt(bool aCanInterrupt);

   286     /**

   287      * This is called when the final chunk has been

   288      * passed to the parser and the content sink has

   289      * interrupted token processing. It schedules

   290      * a ParserContinue PL_Event which will ask the parser

   291      * to HandleParserContinueEvent when it is handled.

   292      * @update	kmcclusk6/1/2001

   293      */

   294     nsresult PostContinueEvent();

   296     /**

   297      *  Fired when the continue parse event is triggered.

   298      *  @update  kmcclusk 5/18/98

   299      */

   300     void HandleParserContinueEvent(class nsParserContinueEvent *);

   302     virtual void Reset() {

   303       Cleanup();

   304       Initialize();

   305     }

   307     bool IsScriptExecuting() {

   308       return mSink && mSink->IsScriptExecuting();

   309     }

   311     bool IsOkToProcessNetworkData() {

   312       return !IsScriptExecuting() && !mProcessingNetworkData;

   313     }

   315  protected:

   317     void Initialize(bool aConstructor = false);

   318     void Cleanup();

   320     /**

   321      *

   322      * @update	gess5/18/98

   323      * @param

   324      * @return

   325      */

   326     nsresult WillBuildModel(nsString& aFilename);

   328     /**

   329      *

   330      * @update	gess5/18/98

   331      * @param

   332      * @return

   333      */

   334     nsresult DidBuildModel(nsresult anErrorCode);

   336 private:

   338     /*******************************************

   339       These are the tokenization methods...

   340      *******************************************/

   342     /**

   343      *  Part of the code sandwich, this gets called right before

   344      *  the tokenization process begins. The main reason for

   345      *  this call is to allow the delegate to do initialization.

   346      *

   347      *  @update  gess 3/25/98

   348      *  @param

   349      *  @return  TRUE if it's ok to proceed

   350      */

   351     bool WillTokenize(bool aIsFinalChunk = false);

   354     /**

   355      *  This is the primary control routine. It iteratively

   356      *  consumes tokens until an error occurs or you run out

   357      *  of data.

   358      *

   359      *  @update  gess 3/25/98

   360      *  @return  error code

   361      */

   362     nsresult Tokenize(bool aIsFinalChunk = false);

   364     /**

   365      * Pushes XML fragment parsing data to expat without an input stream.

   366      */

   367     nsresult Parse(const nsAString& aSourceBuffer,

   368                    void* aKey,

   369                    bool aLastCall);

   371 protected:

   372     //*********************************************

   373     // And now, some data members...

   374     //*********************************************

   377     CParserContext*              mParserContext;

   378     nsCOMPtr<nsIDTD>             mDTD;

   379     nsCOMPtr<nsIRequestObserver> mObserver;

   380     nsCOMPtr<nsIContentSink>     mSink;

   381     nsIRunnable*                 mContinueEvent;  // weak ref

   383     eParserCommands     mCommand;

   384     nsresult            mInternalState;

   385     nsresult            mStreamStatus;

   386     int32_t             mCharsetSource;

   388     uint16_t            mFlags;

   390     nsString            mUnusedInput;

   391     nsCString           mCharset;

   392     nsCString           mCommandStr;

   394     bool                mProcessingNetworkData;

   395     bool                mIsAboutBlank;

   396 };

   398 #endif

The Tor Browser / file revision

parser/htmlparser/src/nsParser.h@6474c204b198

parser/htmlparser/src/nsParser.h