parser/htmlparser/src/nsParser.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/htmlparser/src/nsParser.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,399 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 + 
     1.9 +/**
    1.10 + * MODULE NOTES:
    1.11 + * 
    1.12 + *  This class does two primary jobs:
    1.13 + *    1) It iterates the tokens provided during the 
    1.14 + *       tokenization process, identifing where elements
    1.15 + *       begin and end (doing validation and normalization).
    1.16 + *    2) It controls and coordinates with an instance of
    1.17 + *       the IContentSink interface, to coordinate the
    1.18 + *       the production of the content model.
    1.19 + *
    1.20 + *  The basic operation of this class assumes that an HTML
    1.21 + *  document is non-normalized. Therefore, we don't process
    1.22 + *  the document in a normalized way. Don't bother to look
    1.23 + *  for methods like: doHead() or doBody().
    1.24 + *
    1.25 + *  Instead, in order to be backward compatible, we must
    1.26 + *  scan the set of tokens and perform this basic set of
    1.27 + *  operations:
    1.28 + *    1)  Determine the token type (easy, since the tokens know)
    1.29 + *    2)  Determine the appropriate section of the HTML document
    1.30 + *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
    1.31 + *    3)  Insert content into our document (via the sink) into
    1.32 + *        the correct section.
    1.33 + *    4)  In the case of tags that belong in the BODY, we must
    1.34 + *        ensure that our underlying document state reflects
    1.35 + *        the appropriate context for our tag. 
    1.36 + *
    1.37 + *        For example,if we see a <TR>, we must ensure our 
    1.38 + *        document contains a table into which the row can
    1.39 + *        be placed. This may result in "implicit containers" 
    1.40 + *        created to ensure a well-formed document.
    1.41 + *         
    1.42 + */
    1.43 +
    1.44 +#ifndef NS_PARSER__
    1.45 +#define NS_PARSER__
    1.46 +
    1.47 +#include "nsIParser.h"
    1.48 +#include "nsDeque.h"
    1.49 +#include "nsIURL.h"
    1.50 +#include "CParserContext.h"
    1.51 +#include "nsParserCIID.h"
    1.52 +#include "nsITokenizer.h"
    1.53 +#include "nsHTMLTags.h"
    1.54 +#include "nsIContentSink.h"
    1.55 +#include "nsCOMArray.h"
    1.56 +#include "nsCycleCollectionParticipant.h"
    1.57 +#include "nsWeakReference.h"
    1.58 +
    1.59 +class nsIDTD;
    1.60 +class nsScanner;
    1.61 +class nsIRunnable;
    1.62 +
    1.63 +#ifdef _MSC_VER
    1.64 +#pragma warning( disable : 4275 )
    1.65 +#endif
    1.66 +
    1.67 +
    1.68 +class nsParser : public nsIParser,
    1.69 +                 public nsIStreamListener,
    1.70 +                 public nsSupportsWeakReference
    1.71 +{
    1.72 +  public:
    1.73 +    /**
    1.74 +     * Called on module init
    1.75 +     */
    1.76 +    static nsresult Init();
    1.77 +
    1.78 +    /**
    1.79 +     * Called on module shutdown
    1.80 +     */
    1.81 +    static void Shutdown();
    1.82 +
    1.83 +    NS_DECL_CYCLE_COLLECTING_ISUPPORTS
    1.84 +    NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
    1.85 +
    1.86 +    /**
    1.87 +     * default constructor
    1.88 +     * @update	gess5/11/98
    1.89 +     */
    1.90 +    nsParser();
    1.91 +
    1.92 +    /**
    1.93 +     * Destructor
    1.94 +     * @update	gess5/11/98
    1.95 +     */
    1.96 +    virtual ~nsParser();
    1.97 +
    1.98 +    /**
    1.99 +     * Select given content sink into parser for parser output
   1.100 +     * @update	gess5/11/98
   1.101 +     * @param   aSink is the new sink to be used by parser
   1.102 +     * @return  old sink, or nullptr
   1.103 +     */
   1.104 +    NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);
   1.105 +
   1.106 +    /**
   1.107 +     * retrive the sink set into the parser 
   1.108 +     * @update	gess5/11/98
   1.109 +     * @param   aSink is the new sink to be used by parser
   1.110 +     * @return  old sink, or nullptr
   1.111 +     */
   1.112 +    NS_IMETHOD_(nsIContentSink*) GetContentSink(void);
   1.113 +    
   1.114 +    /**
   1.115 +     *  Call this method once you've created a parser, and want to instruct it
   1.116 +     *  about the command which caused the parser to be constructed. For example,
   1.117 +     *  this allows us to select a DTD which can do, say, view-source.
   1.118 +     *  
   1.119 +     *  @update  gess 3/25/98
   1.120 +     *  @param   aCommand -- ptrs to string that contains command
   1.121 +     *  @return	 nada
   1.122 +     */
   1.123 +    NS_IMETHOD_(void) GetCommand(nsCString& aCommand);
   1.124 +    NS_IMETHOD_(void) SetCommand(const char* aCommand);
   1.125 +    NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);
   1.126 +
   1.127 +    /**
   1.128 +     *  Call this method once you've created a parser, and want to instruct it
   1.129 +     *  about what charset to load
   1.130 +     *  
   1.131 +     *  @update  ftang 4/23/99
   1.132 +     *  @param   aCharset- the charset of a document
   1.133 +     *  @param   aCharsetSource- the source of the charset
   1.134 +     *  @return	 nada
   1.135 +     */
   1.136 +    NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
   1.137 +
   1.138 +    NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource)
   1.139 +    {
   1.140 +         aCharset = mCharset;
   1.141 +         aSource = mCharsetSource;
   1.142 +    }
   1.143 +
   1.144 +    /**
   1.145 +     * Cause parser to parse input from given URL 
   1.146 +     * @update	gess5/11/98
   1.147 +     * @param   aURL is a descriptor for source document
   1.148 +     * @param   aListener is a listener to forward notifications to
   1.149 +     * @return  TRUE if all went well -- FALSE otherwise
   1.150 +     */
   1.151 +    NS_IMETHOD Parse(nsIURI* aURL,
   1.152 +                     nsIRequestObserver* aListener = nullptr,
   1.153 +                     void* aKey = 0,
   1.154 +                     nsDTDMode aMode = eDTDMode_autodetect);
   1.155 +
   1.156 +    /**
   1.157 +     * This method needs documentation
   1.158 +     */
   1.159 +    NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
   1.160 +                             nsTArray<nsString>& aTagStack);
   1.161 +                             
   1.162 +    /**
   1.163 +     * This method gets called when the tokens have been consumed, and it's time
   1.164 +     * to build the model via the content sink.
   1.165 +     * @update	gess5/11/98
   1.166 +     * @return  YES if model building went well -- NO otherwise.
   1.167 +     */
   1.168 +    NS_IMETHOD BuildModel(void);
   1.169 +
   1.170 +    NS_IMETHOD        ContinueInterruptedParsing();
   1.171 +    NS_IMETHOD_(void) BlockParser();
   1.172 +    NS_IMETHOD_(void) UnblockParser();
   1.173 +    NS_IMETHOD_(void) ContinueInterruptedParsingAsync();
   1.174 +    NS_IMETHOD        Terminate(void);
   1.175 +
   1.176 +    /**
   1.177 +     * Call this to query whether the parser is enabled or not.
   1.178 +     *
   1.179 +     *  @update  vidur 4/12/99
   1.180 +     *  @return  current state
   1.181 +     */
   1.182 +    NS_IMETHOD_(bool) IsParserEnabled();
   1.183 +
   1.184 +    /**
   1.185 +     * Call this to query whether the parser thinks it's done with parsing.
   1.186 +     *
   1.187 +     *  @update  rickg 5/12/01
   1.188 +     *  @return  complete state
   1.189 +     */
   1.190 +    NS_IMETHOD_(bool) IsComplete();
   1.191 +
   1.192 +    /**
   1.193 +     *  This rather arcane method (hack) is used as a signal between the
   1.194 +     *  DTD and the parser. It allows the DTD to tell the parser that content
   1.195 +     *  that comes through (parser::parser(string)) but not consumed should
   1.196 +     *  propagate into the next string based parse call.
   1.197 +     *  
   1.198 +     *  @update  gess 9/1/98
   1.199 +     *  @param   aState determines whether we propagate unused string content.
   1.200 +     *  @return  current state
   1.201 +     */
   1.202 +    void SetUnusedInput(nsString& aBuffer);
   1.203 +
   1.204 +    /**
   1.205 +     * This method gets called (automatically) during incremental parsing
   1.206 +     * @update	gess5/11/98
   1.207 +     * @return  TRUE if all went well, otherwise FALSE
   1.208 +     */
   1.209 +    virtual nsresult ResumeParse(bool allowIteration = true, 
   1.210 +                                 bool aIsFinalChunk = false,
   1.211 +                                 bool aCanInterrupt = true);
   1.212 +
   1.213 +     //*********************************************
   1.214 +      // These methods are callback methods used by
   1.215 +      // net lib to let us know about our inputstream.
   1.216 +      //*********************************************
   1.217 +    // nsIRequestObserver methods:
   1.218 +    NS_DECL_NSIREQUESTOBSERVER
   1.219 +
   1.220 +    // nsIStreamListener methods:
   1.221 +    NS_DECL_NSISTREAMLISTENER
   1.222 +
   1.223 +    void              PushContext(CParserContext& aContext);
   1.224 +    CParserContext*   PopContext();
   1.225 +    CParserContext*   PeekContext() {return mParserContext;}
   1.226 +
   1.227 +    /** 
   1.228 +     * Get the channel associated with this parser
   1.229 +     * @update harishd,gagan 07/17/01
   1.230 +     * @param aChannel out param that will contain the result
   1.231 +     * @return NS_OK if successful
   1.232 +     */
   1.233 +    NS_IMETHOD GetChannel(nsIChannel** aChannel);
   1.234 +
   1.235 +    /** 
   1.236 +     * Get the DTD associated with this parser
   1.237 +     * @update vidur 9/29/99
   1.238 +     * @param aDTD out param that will contain the result
   1.239 +     * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
   1.240 +     */
   1.241 +    NS_IMETHOD GetDTD(nsIDTD** aDTD);
   1.242 +  
   1.243 +    /**
   1.244 +     * Get the nsIStreamListener for this parser
   1.245 +     */
   1.246 +    virtual nsIStreamListener* GetStreamListener();
   1.247 +
   1.248 +    void SetSinkCharset(nsACString& aCharset);
   1.249 +
   1.250 +    /**
   1.251 +     *  Removes continue parsing events
   1.252 +     *  @update  kmcclusk 5/18/98
   1.253 +     */
   1.254 +
   1.255 +    NS_IMETHODIMP CancelParsingEvents();
   1.256 +
   1.257 +    /**
   1.258 +     * Return true.
   1.259 +     */
   1.260 +    virtual bool IsInsertionPointDefined();
   1.261 +
   1.262 +    /**
   1.263 +     * No-op.
   1.264 +     */
   1.265 +    virtual void BeginEvaluatingParserInsertedScript();
   1.266 +
   1.267 +    /**
   1.268 +     * No-op.
   1.269 +     */
   1.270 +    virtual void EndEvaluatingParserInsertedScript();
   1.271 +
   1.272 +    /**
   1.273 +     * No-op.
   1.274 +     */
   1.275 +    virtual void MarkAsNotScriptCreated(const char* aCommand);
   1.276 +
   1.277 +    /**
   1.278 +     * Always false.
   1.279 +     */
   1.280 +    virtual bool IsScriptCreated();
   1.281 +
   1.282 +    /**  
   1.283 +     *  Set to parser state to indicate whether parsing tokens can be interrupted
   1.284 +     *  @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
   1.285 +     *  @update  kmcclusk 5/18/98
   1.286 +     */
   1.287 +    void SetCanInterrupt(bool aCanInterrupt);
   1.288 +
   1.289 +    /**
   1.290 +     * This is called when the final chunk has been
   1.291 +     * passed to the parser and the content sink has
   1.292 +     * interrupted token processing. It schedules
   1.293 +     * a ParserContinue PL_Event which will ask the parser
   1.294 +     * to HandleParserContinueEvent when it is handled.
   1.295 +     * @update	kmcclusk6/1/2001
   1.296 +     */
   1.297 +    nsresult PostContinueEvent();
   1.298 +
   1.299 +    /**
   1.300 +     *  Fired when the continue parse event is triggered.
   1.301 +     *  @update  kmcclusk 5/18/98
   1.302 +     */
   1.303 +    void HandleParserContinueEvent(class nsParserContinueEvent *);
   1.304 +
   1.305 +    virtual void Reset() {
   1.306 +      Cleanup();
   1.307 +      Initialize();
   1.308 +    }
   1.309 +
   1.310 +    bool IsScriptExecuting() {
   1.311 +      return mSink && mSink->IsScriptExecuting();
   1.312 +    }
   1.313 +
   1.314 +    bool IsOkToProcessNetworkData() {
   1.315 +      return !IsScriptExecuting() && !mProcessingNetworkData;
   1.316 +    }
   1.317 +
   1.318 + protected:
   1.319 +
   1.320 +    void Initialize(bool aConstructor = false);
   1.321 +    void Cleanup();
   1.322 +
   1.323 +    /**
   1.324 +     * 
   1.325 +     * @update	gess5/18/98
   1.326 +     * @param 
   1.327 +     * @return
   1.328 +     */
   1.329 +    nsresult WillBuildModel(nsString& aFilename);
   1.330 +
   1.331 +    /**
   1.332 +     * 
   1.333 +     * @update	gess5/18/98
   1.334 +     * @param 
   1.335 +     * @return
   1.336 +     */
   1.337 +    nsresult DidBuildModel(nsresult anErrorCode);
   1.338 +
   1.339 +private:
   1.340 +
   1.341 +    /*******************************************
   1.342 +      These are the tokenization methods...
   1.343 +     *******************************************/
   1.344 +
   1.345 +    /**
   1.346 +     *  Part of the code sandwich, this gets called right before
   1.347 +     *  the tokenization process begins. The main reason for
   1.348 +     *  this call is to allow the delegate to do initialization.
   1.349 +     *  
   1.350 +     *  @update  gess 3/25/98
   1.351 +     *  @param   
   1.352 +     *  @return  TRUE if it's ok to proceed
   1.353 +     */
   1.354 +    bool WillTokenize(bool aIsFinalChunk = false);
   1.355 +
   1.356 +   
   1.357 +    /**
   1.358 +     *  This is the primary control routine. It iteratively
   1.359 +     *  consumes tokens until an error occurs or you run out
   1.360 +     *  of data.
   1.361 +     *  
   1.362 +     *  @update  gess 3/25/98
   1.363 +     *  @return  error code 
   1.364 +     */
   1.365 +    nsresult Tokenize(bool aIsFinalChunk = false);
   1.366 +
   1.367 +    /**
   1.368 +     * Pushes XML fragment parsing data to expat without an input stream.
   1.369 +     */
   1.370 +    nsresult Parse(const nsAString& aSourceBuffer,
   1.371 +                   void* aKey,
   1.372 +                   bool aLastCall);
   1.373 +
   1.374 +protected:
   1.375 +    //*********************************************
   1.376 +    // And now, some data members...
   1.377 +    //*********************************************
   1.378 +    
   1.379 +      
   1.380 +    CParserContext*              mParserContext;
   1.381 +    nsCOMPtr<nsIDTD>             mDTD;
   1.382 +    nsCOMPtr<nsIRequestObserver> mObserver;
   1.383 +    nsCOMPtr<nsIContentSink>     mSink;
   1.384 +    nsIRunnable*                 mContinueEvent;  // weak ref
   1.385 +
   1.386 +    eParserCommands     mCommand;
   1.387 +    nsresult            mInternalState;
   1.388 +    nsresult            mStreamStatus;
   1.389 +    int32_t             mCharsetSource;
   1.390 +    
   1.391 +    uint16_t            mFlags;
   1.392 +
   1.393 +    nsString            mUnusedInput;
   1.394 +    nsCString           mCharset;
   1.395 +    nsCString           mCommandStr;
   1.396 +
   1.397 +    bool                mProcessingNetworkData;
   1.398 +    bool                mIsAboutBlank;
   1.399 +};
   1.400 +
   1.401 +#endif 
   1.402 +

mercurial