1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/htmlparser/src/nsParser.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,399 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/** 1.10 + * MODULE NOTES: 1.11 + * 1.12 + * This class does two primary jobs: 1.13 + * 1) It iterates the tokens provided during the 1.14 + * tokenization process, identifing where elements 1.15 + * begin and end (doing validation and normalization). 1.16 + * 2) It controls and coordinates with an instance of 1.17 + * the IContentSink interface, to coordinate the 1.18 + * the production of the content model. 1.19 + * 1.20 + * The basic operation of this class assumes that an HTML 1.21 + * document is non-normalized. Therefore, we don't process 1.22 + * the document in a normalized way. Don't bother to look 1.23 + * for methods like: doHead() or doBody(). 1.24 + * 1.25 + * Instead, in order to be backward compatible, we must 1.26 + * scan the set of tokens and perform this basic set of 1.27 + * operations: 1.28 + * 1) Determine the token type (easy, since the tokens know) 1.29 + * 2) Determine the appropriate section of the HTML document 1.30 + * each token belongs in (HTML,HEAD,BODY,FRAMESET). 1.31 + * 3) Insert content into our document (via the sink) into 1.32 + * the correct section. 1.33 + * 4) In the case of tags that belong in the BODY, we must 1.34 + * ensure that our underlying document state reflects 1.35 + * the appropriate context for our tag. 1.36 + * 1.37 + * For example,if we see a <TR>, we must ensure our 1.38 + * document contains a table into which the row can 1.39 + * be placed. This may result in "implicit containers" 1.40 + * created to ensure a well-formed document. 1.41 + * 1.42 + */ 1.43 + 1.44 +#ifndef NS_PARSER__ 1.45 +#define NS_PARSER__ 1.46 + 1.47 +#include "nsIParser.h" 1.48 +#include "nsDeque.h" 1.49 +#include "nsIURL.h" 1.50 +#include "CParserContext.h" 1.51 +#include "nsParserCIID.h" 1.52 +#include "nsITokenizer.h" 1.53 +#include "nsHTMLTags.h" 1.54 +#include "nsIContentSink.h" 1.55 +#include "nsCOMArray.h" 1.56 +#include "nsCycleCollectionParticipant.h" 1.57 +#include "nsWeakReference.h" 1.58 + 1.59 +class nsIDTD; 1.60 +class nsScanner; 1.61 +class nsIRunnable; 1.62 + 1.63 +#ifdef _MSC_VER 1.64 +#pragma warning( disable : 4275 ) 1.65 +#endif 1.66 + 1.67 + 1.68 +class nsParser : public nsIParser, 1.69 + public nsIStreamListener, 1.70 + public nsSupportsWeakReference 1.71 +{ 1.72 + public: 1.73 + /** 1.74 + * Called on module init 1.75 + */ 1.76 + static nsresult Init(); 1.77 + 1.78 + /** 1.79 + * Called on module shutdown 1.80 + */ 1.81 + static void Shutdown(); 1.82 + 1.83 + NS_DECL_CYCLE_COLLECTING_ISUPPORTS 1.84 + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser) 1.85 + 1.86 + /** 1.87 + * default constructor 1.88 + * @update gess5/11/98 1.89 + */ 1.90 + nsParser(); 1.91 + 1.92 + /** 1.93 + * Destructor 1.94 + * @update gess5/11/98 1.95 + */ 1.96 + virtual ~nsParser(); 1.97 + 1.98 + /** 1.99 + * Select given content sink into parser for parser output 1.100 + * @update gess5/11/98 1.101 + * @param aSink is the new sink to be used by parser 1.102 + * @return old sink, or nullptr 1.103 + */ 1.104 + NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink); 1.105 + 1.106 + /** 1.107 + * retrive the sink set into the parser 1.108 + * @update gess5/11/98 1.109 + * @param aSink is the new sink to be used by parser 1.110 + * @return old sink, or nullptr 1.111 + */ 1.112 + NS_IMETHOD_(nsIContentSink*) GetContentSink(void); 1.113 + 1.114 + /** 1.115 + * Call this method once you've created a parser, and want to instruct it 1.116 + * about the command which caused the parser to be constructed. For example, 1.117 + * this allows us to select a DTD which can do, say, view-source. 1.118 + * 1.119 + * @update gess 3/25/98 1.120 + * @param aCommand -- ptrs to string that contains command 1.121 + * @return nada 1.122 + */ 1.123 + NS_IMETHOD_(void) GetCommand(nsCString& aCommand); 1.124 + NS_IMETHOD_(void) SetCommand(const char* aCommand); 1.125 + NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand); 1.126 + 1.127 + /** 1.128 + * Call this method once you've created a parser, and want to instruct it 1.129 + * about what charset to load 1.130 + * 1.131 + * @update ftang 4/23/99 1.132 + * @param aCharset- the charset of a document 1.133 + * @param aCharsetSource- the source of the charset 1.134 + * @return nada 1.135 + */ 1.136 + NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource); 1.137 + 1.138 + NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource) 1.139 + { 1.140 + aCharset = mCharset; 1.141 + aSource = mCharsetSource; 1.142 + } 1.143 + 1.144 + /** 1.145 + * Cause parser to parse input from given URL 1.146 + * @update gess5/11/98 1.147 + * @param aURL is a descriptor for source document 1.148 + * @param aListener is a listener to forward notifications to 1.149 + * @return TRUE if all went well -- FALSE otherwise 1.150 + */ 1.151 + NS_IMETHOD Parse(nsIURI* aURL, 1.152 + nsIRequestObserver* aListener = nullptr, 1.153 + void* aKey = 0, 1.154 + nsDTDMode aMode = eDTDMode_autodetect); 1.155 + 1.156 + /** 1.157 + * This method needs documentation 1.158 + */ 1.159 + NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer, 1.160 + nsTArray<nsString>& aTagStack); 1.161 + 1.162 + /** 1.163 + * This method gets called when the tokens have been consumed, and it's time 1.164 + * to build the model via the content sink. 1.165 + * @update gess5/11/98 1.166 + * @return YES if model building went well -- NO otherwise. 1.167 + */ 1.168 + NS_IMETHOD BuildModel(void); 1.169 + 1.170 + NS_IMETHOD ContinueInterruptedParsing(); 1.171 + NS_IMETHOD_(void) BlockParser(); 1.172 + NS_IMETHOD_(void) UnblockParser(); 1.173 + NS_IMETHOD_(void) ContinueInterruptedParsingAsync(); 1.174 + NS_IMETHOD Terminate(void); 1.175 + 1.176 + /** 1.177 + * Call this to query whether the parser is enabled or not. 1.178 + * 1.179 + * @update vidur 4/12/99 1.180 + * @return current state 1.181 + */ 1.182 + NS_IMETHOD_(bool) IsParserEnabled(); 1.183 + 1.184 + /** 1.185 + * Call this to query whether the parser thinks it's done with parsing. 1.186 + * 1.187 + * @update rickg 5/12/01 1.188 + * @return complete state 1.189 + */ 1.190 + NS_IMETHOD_(bool) IsComplete(); 1.191 + 1.192 + /** 1.193 + * This rather arcane method (hack) is used as a signal between the 1.194 + * DTD and the parser. It allows the DTD to tell the parser that content 1.195 + * that comes through (parser::parser(string)) but not consumed should 1.196 + * propagate into the next string based parse call. 1.197 + * 1.198 + * @update gess 9/1/98 1.199 + * @param aState determines whether we propagate unused string content. 1.200 + * @return current state 1.201 + */ 1.202 + void SetUnusedInput(nsString& aBuffer); 1.203 + 1.204 + /** 1.205 + * This method gets called (automatically) during incremental parsing 1.206 + * @update gess5/11/98 1.207 + * @return TRUE if all went well, otherwise FALSE 1.208 + */ 1.209 + virtual nsresult ResumeParse(bool allowIteration = true, 1.210 + bool aIsFinalChunk = false, 1.211 + bool aCanInterrupt = true); 1.212 + 1.213 + //********************************************* 1.214 + // These methods are callback methods used by 1.215 + // net lib to let us know about our inputstream. 1.216 + //********************************************* 1.217 + // nsIRequestObserver methods: 1.218 + NS_DECL_NSIREQUESTOBSERVER 1.219 + 1.220 + // nsIStreamListener methods: 1.221 + NS_DECL_NSISTREAMLISTENER 1.222 + 1.223 + void PushContext(CParserContext& aContext); 1.224 + CParserContext* PopContext(); 1.225 + CParserContext* PeekContext() {return mParserContext;} 1.226 + 1.227 + /** 1.228 + * Get the channel associated with this parser 1.229 + * @update harishd,gagan 07/17/01 1.230 + * @param aChannel out param that will contain the result 1.231 + * @return NS_OK if successful 1.232 + */ 1.233 + NS_IMETHOD GetChannel(nsIChannel** aChannel); 1.234 + 1.235 + /** 1.236 + * Get the DTD associated with this parser 1.237 + * @update vidur 9/29/99 1.238 + * @param aDTD out param that will contain the result 1.239 + * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error 1.240 + */ 1.241 + NS_IMETHOD GetDTD(nsIDTD** aDTD); 1.242 + 1.243 + /** 1.244 + * Get the nsIStreamListener for this parser 1.245 + */ 1.246 + virtual nsIStreamListener* GetStreamListener(); 1.247 + 1.248 + void SetSinkCharset(nsACString& aCharset); 1.249 + 1.250 + /** 1.251 + * Removes continue parsing events 1.252 + * @update kmcclusk 5/18/98 1.253 + */ 1.254 + 1.255 + NS_IMETHODIMP CancelParsingEvents(); 1.256 + 1.257 + /** 1.258 + * Return true. 1.259 + */ 1.260 + virtual bool IsInsertionPointDefined(); 1.261 + 1.262 + /** 1.263 + * No-op. 1.264 + */ 1.265 + virtual void BeginEvaluatingParserInsertedScript(); 1.266 + 1.267 + /** 1.268 + * No-op. 1.269 + */ 1.270 + virtual void EndEvaluatingParserInsertedScript(); 1.271 + 1.272 + /** 1.273 + * No-op. 1.274 + */ 1.275 + virtual void MarkAsNotScriptCreated(const char* aCommand); 1.276 + 1.277 + /** 1.278 + * Always false. 1.279 + */ 1.280 + virtual bool IsScriptCreated(); 1.281 + 1.282 + /** 1.283 + * Set to parser state to indicate whether parsing tokens can be interrupted 1.284 + * @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted. 1.285 + * @update kmcclusk 5/18/98 1.286 + */ 1.287 + void SetCanInterrupt(bool aCanInterrupt); 1.288 + 1.289 + /** 1.290 + * This is called when the final chunk has been 1.291 + * passed to the parser and the content sink has 1.292 + * interrupted token processing. It schedules 1.293 + * a ParserContinue PL_Event which will ask the parser 1.294 + * to HandleParserContinueEvent when it is handled. 1.295 + * @update kmcclusk6/1/2001 1.296 + */ 1.297 + nsresult PostContinueEvent(); 1.298 + 1.299 + /** 1.300 + * Fired when the continue parse event is triggered. 1.301 + * @update kmcclusk 5/18/98 1.302 + */ 1.303 + void HandleParserContinueEvent(class nsParserContinueEvent *); 1.304 + 1.305 + virtual void Reset() { 1.306 + Cleanup(); 1.307 + Initialize(); 1.308 + } 1.309 + 1.310 + bool IsScriptExecuting() { 1.311 + return mSink && mSink->IsScriptExecuting(); 1.312 + } 1.313 + 1.314 + bool IsOkToProcessNetworkData() { 1.315 + return !IsScriptExecuting() && !mProcessingNetworkData; 1.316 + } 1.317 + 1.318 + protected: 1.319 + 1.320 + void Initialize(bool aConstructor = false); 1.321 + void Cleanup(); 1.322 + 1.323 + /** 1.324 + * 1.325 + * @update gess5/18/98 1.326 + * @param 1.327 + * @return 1.328 + */ 1.329 + nsresult WillBuildModel(nsString& aFilename); 1.330 + 1.331 + /** 1.332 + * 1.333 + * @update gess5/18/98 1.334 + * @param 1.335 + * @return 1.336 + */ 1.337 + nsresult DidBuildModel(nsresult anErrorCode); 1.338 + 1.339 +private: 1.340 + 1.341 + /******************************************* 1.342 + These are the tokenization methods... 1.343 + *******************************************/ 1.344 + 1.345 + /** 1.346 + * Part of the code sandwich, this gets called right before 1.347 + * the tokenization process begins. The main reason for 1.348 + * this call is to allow the delegate to do initialization. 1.349 + * 1.350 + * @update gess 3/25/98 1.351 + * @param 1.352 + * @return TRUE if it's ok to proceed 1.353 + */ 1.354 + bool WillTokenize(bool aIsFinalChunk = false); 1.355 + 1.356 + 1.357 + /** 1.358 + * This is the primary control routine. It iteratively 1.359 + * consumes tokens until an error occurs or you run out 1.360 + * of data. 1.361 + * 1.362 + * @update gess 3/25/98 1.363 + * @return error code 1.364 + */ 1.365 + nsresult Tokenize(bool aIsFinalChunk = false); 1.366 + 1.367 + /** 1.368 + * Pushes XML fragment parsing data to expat without an input stream. 1.369 + */ 1.370 + nsresult Parse(const nsAString& aSourceBuffer, 1.371 + void* aKey, 1.372 + bool aLastCall); 1.373 + 1.374 +protected: 1.375 + //********************************************* 1.376 + // And now, some data members... 1.377 + //********************************************* 1.378 + 1.379 + 1.380 + CParserContext* mParserContext; 1.381 + nsCOMPtr<nsIDTD> mDTD; 1.382 + nsCOMPtr<nsIRequestObserver> mObserver; 1.383 + nsCOMPtr<nsIContentSink> mSink; 1.384 + nsIRunnable* mContinueEvent; // weak ref 1.385 + 1.386 + eParserCommands mCommand; 1.387 + nsresult mInternalState; 1.388 + nsresult mStreamStatus; 1.389 + int32_t mCharsetSource; 1.390 + 1.391 + uint16_t mFlags; 1.392 + 1.393 + nsString mUnusedInput; 1.394 + nsCString mCharset; 1.395 + nsCString mCommandStr; 1.396 + 1.397 + bool mProcessingNetworkData; 1.398 + bool mIsAboutBlank; 1.399 +}; 1.400 + 1.401 +#endif 1.402 +