parser/htmlparser/src/nsParser.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 /**
     7  * MODULE NOTES:
     8  * 
     9  *  This class does two primary jobs:
    10  *    1) It iterates the tokens provided during the 
    11  *       tokenization process, identifing where elements
    12  *       begin and end (doing validation and normalization).
    13  *    2) It controls and coordinates with an instance of
    14  *       the IContentSink interface, to coordinate the
    15  *       the production of the content model.
    16  *
    17  *  The basic operation of this class assumes that an HTML
    18  *  document is non-normalized. Therefore, we don't process
    19  *  the document in a normalized way. Don't bother to look
    20  *  for methods like: doHead() or doBody().
    21  *
    22  *  Instead, in order to be backward compatible, we must
    23  *  scan the set of tokens and perform this basic set of
    24  *  operations:
    25  *    1)  Determine the token type (easy, since the tokens know)
    26  *    2)  Determine the appropriate section of the HTML document
    27  *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
    28  *    3)  Insert content into our document (via the sink) into
    29  *        the correct section.
    30  *    4)  In the case of tags that belong in the BODY, we must
    31  *        ensure that our underlying document state reflects
    32  *        the appropriate context for our tag. 
    33  *
    34  *        For example,if we see a <TR>, we must ensure our 
    35  *        document contains a table into which the row can
    36  *        be placed. This may result in "implicit containers" 
    37  *        created to ensure a well-formed document.
    38  *         
    39  */
    41 #ifndef NS_PARSER__
    42 #define NS_PARSER__
    44 #include "nsIParser.h"
    45 #include "nsDeque.h"
    46 #include "nsIURL.h"
    47 #include "CParserContext.h"
    48 #include "nsParserCIID.h"
    49 #include "nsITokenizer.h"
    50 #include "nsHTMLTags.h"
    51 #include "nsIContentSink.h"
    52 #include "nsCOMArray.h"
    53 #include "nsCycleCollectionParticipant.h"
    54 #include "nsWeakReference.h"
    56 class nsIDTD;
    57 class nsScanner;
    58 class nsIRunnable;
    60 #ifdef _MSC_VER
    61 #pragma warning( disable : 4275 )
    62 #endif
    65 class nsParser : public nsIParser,
    66                  public nsIStreamListener,
    67                  public nsSupportsWeakReference
    68 {
    69   public:
    70     /**
    71      * Called on module init
    72      */
    73     static nsresult Init();
    75     /**
    76      * Called on module shutdown
    77      */
    78     static void Shutdown();
    80     NS_DECL_CYCLE_COLLECTING_ISUPPORTS
    81     NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
    83     /**
    84      * default constructor
    85      * @update	gess5/11/98
    86      */
    87     nsParser();
    89     /**
    90      * Destructor
    91      * @update	gess5/11/98
    92      */
    93     virtual ~nsParser();
    95     /**
    96      * Select given content sink into parser for parser output
    97      * @update	gess5/11/98
    98      * @param   aSink is the new sink to be used by parser
    99      * @return  old sink, or nullptr
   100      */
   101     NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);
   103     /**
   104      * retrive the sink set into the parser 
   105      * @update	gess5/11/98
   106      * @param   aSink is the new sink to be used by parser
   107      * @return  old sink, or nullptr
   108      */
   109     NS_IMETHOD_(nsIContentSink*) GetContentSink(void);
   111     /**
   112      *  Call this method once you've created a parser, and want to instruct it
   113      *  about the command which caused the parser to be constructed. For example,
   114      *  this allows us to select a DTD which can do, say, view-source.
   115      *  
   116      *  @update  gess 3/25/98
   117      *  @param   aCommand -- ptrs to string that contains command
   118      *  @return	 nada
   119      */
   120     NS_IMETHOD_(void) GetCommand(nsCString& aCommand);
   121     NS_IMETHOD_(void) SetCommand(const char* aCommand);
   122     NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);
   124     /**
   125      *  Call this method once you've created a parser, and want to instruct it
   126      *  about what charset to load
   127      *  
   128      *  @update  ftang 4/23/99
   129      *  @param   aCharset- the charset of a document
   130      *  @param   aCharsetSource- the source of the charset
   131      *  @return	 nada
   132      */
   133     NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
   135     NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource)
   136     {
   137          aCharset = mCharset;
   138          aSource = mCharsetSource;
   139     }
   141     /**
   142      * Cause parser to parse input from given URL 
   143      * @update	gess5/11/98
   144      * @param   aURL is a descriptor for source document
   145      * @param   aListener is a listener to forward notifications to
   146      * @return  TRUE if all went well -- FALSE otherwise
   147      */
   148     NS_IMETHOD Parse(nsIURI* aURL,
   149                      nsIRequestObserver* aListener = nullptr,
   150                      void* aKey = 0,
   151                      nsDTDMode aMode = eDTDMode_autodetect);
   153     /**
   154      * This method needs documentation
   155      */
   156     NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
   157                              nsTArray<nsString>& aTagStack);
   159     /**
   160      * This method gets called when the tokens have been consumed, and it's time
   161      * to build the model via the content sink.
   162      * @update	gess5/11/98
   163      * @return  YES if model building went well -- NO otherwise.
   164      */
   165     NS_IMETHOD BuildModel(void);
   167     NS_IMETHOD        ContinueInterruptedParsing();
   168     NS_IMETHOD_(void) BlockParser();
   169     NS_IMETHOD_(void) UnblockParser();
   170     NS_IMETHOD_(void) ContinueInterruptedParsingAsync();
   171     NS_IMETHOD        Terminate(void);
   173     /**
   174      * Call this to query whether the parser is enabled or not.
   175      *
   176      *  @update  vidur 4/12/99
   177      *  @return  current state
   178      */
   179     NS_IMETHOD_(bool) IsParserEnabled();
   181     /**
   182      * Call this to query whether the parser thinks it's done with parsing.
   183      *
   184      *  @update  rickg 5/12/01
   185      *  @return  complete state
   186      */
   187     NS_IMETHOD_(bool) IsComplete();
   189     /**
   190      *  This rather arcane method (hack) is used as a signal between the
   191      *  DTD and the parser. It allows the DTD to tell the parser that content
   192      *  that comes through (parser::parser(string)) but not consumed should
   193      *  propagate into the next string based parse call.
   194      *  
   195      *  @update  gess 9/1/98
   196      *  @param   aState determines whether we propagate unused string content.
   197      *  @return  current state
   198      */
   199     void SetUnusedInput(nsString& aBuffer);
   201     /**
   202      * This method gets called (automatically) during incremental parsing
   203      * @update	gess5/11/98
   204      * @return  TRUE if all went well, otherwise FALSE
   205      */
   206     virtual nsresult ResumeParse(bool allowIteration = true, 
   207                                  bool aIsFinalChunk = false,
   208                                  bool aCanInterrupt = true);
   210      //*********************************************
   211       // These methods are callback methods used by
   212       // net lib to let us know about our inputstream.
   213       //*********************************************
   214     // nsIRequestObserver methods:
   215     NS_DECL_NSIREQUESTOBSERVER
   217     // nsIStreamListener methods:
   218     NS_DECL_NSISTREAMLISTENER
   220     void              PushContext(CParserContext& aContext);
   221     CParserContext*   PopContext();
   222     CParserContext*   PeekContext() {return mParserContext;}
   224     /** 
   225      * Get the channel associated with this parser
   226      * @update harishd,gagan 07/17/01
   227      * @param aChannel out param that will contain the result
   228      * @return NS_OK if successful
   229      */
   230     NS_IMETHOD GetChannel(nsIChannel** aChannel);
   232     /** 
   233      * Get the DTD associated with this parser
   234      * @update vidur 9/29/99
   235      * @param aDTD out param that will contain the result
   236      * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
   237      */
   238     NS_IMETHOD GetDTD(nsIDTD** aDTD);
   240     /**
   241      * Get the nsIStreamListener for this parser
   242      */
   243     virtual nsIStreamListener* GetStreamListener();
   245     void SetSinkCharset(nsACString& aCharset);
   247     /**
   248      *  Removes continue parsing events
   249      *  @update  kmcclusk 5/18/98
   250      */
   252     NS_IMETHODIMP CancelParsingEvents();
   254     /**
   255      * Return true.
   256      */
   257     virtual bool IsInsertionPointDefined();
   259     /**
   260      * No-op.
   261      */
   262     virtual void BeginEvaluatingParserInsertedScript();
   264     /**
   265      * No-op.
   266      */
   267     virtual void EndEvaluatingParserInsertedScript();
   269     /**
   270      * No-op.
   271      */
   272     virtual void MarkAsNotScriptCreated(const char* aCommand);
   274     /**
   275      * Always false.
   276      */
   277     virtual bool IsScriptCreated();
   279     /**  
   280      *  Set to parser state to indicate whether parsing tokens can be interrupted
   281      *  @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
   282      *  @update  kmcclusk 5/18/98
   283      */
   284     void SetCanInterrupt(bool aCanInterrupt);
   286     /**
   287      * This is called when the final chunk has been
   288      * passed to the parser and the content sink has
   289      * interrupted token processing. It schedules
   290      * a ParserContinue PL_Event which will ask the parser
   291      * to HandleParserContinueEvent when it is handled.
   292      * @update	kmcclusk6/1/2001
   293      */
   294     nsresult PostContinueEvent();
   296     /**
   297      *  Fired when the continue parse event is triggered.
   298      *  @update  kmcclusk 5/18/98
   299      */
   300     void HandleParserContinueEvent(class nsParserContinueEvent *);
   302     virtual void Reset() {
   303       Cleanup();
   304       Initialize();
   305     }
   307     bool IsScriptExecuting() {
   308       return mSink && mSink->IsScriptExecuting();
   309     }
   311     bool IsOkToProcessNetworkData() {
   312       return !IsScriptExecuting() && !mProcessingNetworkData;
   313     }
   315  protected:
   317     void Initialize(bool aConstructor = false);
   318     void Cleanup();
   320     /**
   321      * 
   322      * @update	gess5/18/98
   323      * @param 
   324      * @return
   325      */
   326     nsresult WillBuildModel(nsString& aFilename);
   328     /**
   329      * 
   330      * @update	gess5/18/98
   331      * @param 
   332      * @return
   333      */
   334     nsresult DidBuildModel(nsresult anErrorCode);
   336 private:
   338     /*******************************************
   339       These are the tokenization methods...
   340      *******************************************/
   342     /**
   343      *  Part of the code sandwich, this gets called right before
   344      *  the tokenization process begins. The main reason for
   345      *  this call is to allow the delegate to do initialization.
   346      *  
   347      *  @update  gess 3/25/98
   348      *  @param   
   349      *  @return  TRUE if it's ok to proceed
   350      */
   351     bool WillTokenize(bool aIsFinalChunk = false);
   354     /**
   355      *  This is the primary control routine. It iteratively
   356      *  consumes tokens until an error occurs or you run out
   357      *  of data.
   358      *  
   359      *  @update  gess 3/25/98
   360      *  @return  error code 
   361      */
   362     nsresult Tokenize(bool aIsFinalChunk = false);
   364     /**
   365      * Pushes XML fragment parsing data to expat without an input stream.
   366      */
   367     nsresult Parse(const nsAString& aSourceBuffer,
   368                    void* aKey,
   369                    bool aLastCall);
   371 protected:
   372     //*********************************************
   373     // And now, some data members...
   374     //*********************************************
   377     CParserContext*              mParserContext;
   378     nsCOMPtr<nsIDTD>             mDTD;
   379     nsCOMPtr<nsIRequestObserver> mObserver;
   380     nsCOMPtr<nsIContentSink>     mSink;
   381     nsIRunnable*                 mContinueEvent;  // weak ref
   383     eParserCommands     mCommand;
   384     nsresult            mInternalState;
   385     nsresult            mStreamStatus;
   386     int32_t             mCharsetSource;
   388     uint16_t            mFlags;
   390     nsString            mUnusedInput;
   391     nsCString           mCharset;
   392     nsCString           mCommandStr;
   394     bool                mProcessingNetworkData;
   395     bool                mIsAboutBlank;
   396 };
   398 #endif 

mercurial