parser/htmlparser/src/nsParser.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 /**
michael@0 7 * MODULE NOTES:
michael@0 8 *
michael@0 9 * This class does two primary jobs:
michael@0 10 * 1) It iterates the tokens provided during the
michael@0 11 * tokenization process, identifing where elements
michael@0 12 * begin and end (doing validation and normalization).
michael@0 13 * 2) It controls and coordinates with an instance of
michael@0 14 * the IContentSink interface, to coordinate the
michael@0 15 * the production of the content model.
michael@0 16 *
michael@0 17 * The basic operation of this class assumes that an HTML
michael@0 18 * document is non-normalized. Therefore, we don't process
michael@0 19 * the document in a normalized way. Don't bother to look
michael@0 20 * for methods like: doHead() or doBody().
michael@0 21 *
michael@0 22 * Instead, in order to be backward compatible, we must
michael@0 23 * scan the set of tokens and perform this basic set of
michael@0 24 * operations:
michael@0 25 * 1) Determine the token type (easy, since the tokens know)
michael@0 26 * 2) Determine the appropriate section of the HTML document
michael@0 27 * each token belongs in (HTML,HEAD,BODY,FRAMESET).
michael@0 28 * 3) Insert content into our document (via the sink) into
michael@0 29 * the correct section.
michael@0 30 * 4) In the case of tags that belong in the BODY, we must
michael@0 31 * ensure that our underlying document state reflects
michael@0 32 * the appropriate context for our tag.
michael@0 33 *
michael@0 34 * For example,if we see a <TR>, we must ensure our
michael@0 35 * document contains a table into which the row can
michael@0 36 * be placed. This may result in "implicit containers"
michael@0 37 * created to ensure a well-formed document.
michael@0 38 *
michael@0 39 */
michael@0 40
michael@0 41 #ifndef NS_PARSER__
michael@0 42 #define NS_PARSER__
michael@0 43
michael@0 44 #include "nsIParser.h"
michael@0 45 #include "nsDeque.h"
michael@0 46 #include "nsIURL.h"
michael@0 47 #include "CParserContext.h"
michael@0 48 #include "nsParserCIID.h"
michael@0 49 #include "nsITokenizer.h"
michael@0 50 #include "nsHTMLTags.h"
michael@0 51 #include "nsIContentSink.h"
michael@0 52 #include "nsCOMArray.h"
michael@0 53 #include "nsCycleCollectionParticipant.h"
michael@0 54 #include "nsWeakReference.h"
michael@0 55
michael@0 56 class nsIDTD;
michael@0 57 class nsScanner;
michael@0 58 class nsIRunnable;
michael@0 59
michael@0 60 #ifdef _MSC_VER
michael@0 61 #pragma warning( disable : 4275 )
michael@0 62 #endif
michael@0 63
michael@0 64
michael@0 65 class nsParser : public nsIParser,
michael@0 66 public nsIStreamListener,
michael@0 67 public nsSupportsWeakReference
michael@0 68 {
michael@0 69 public:
michael@0 70 /**
michael@0 71 * Called on module init
michael@0 72 */
michael@0 73 static nsresult Init();
michael@0 74
michael@0 75 /**
michael@0 76 * Called on module shutdown
michael@0 77 */
michael@0 78 static void Shutdown();
michael@0 79
michael@0 80 NS_DECL_CYCLE_COLLECTING_ISUPPORTS
michael@0 81 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
michael@0 82
michael@0 83 /**
michael@0 84 * default constructor
michael@0 85 * @update gess5/11/98
michael@0 86 */
michael@0 87 nsParser();
michael@0 88
michael@0 89 /**
michael@0 90 * Destructor
michael@0 91 * @update gess5/11/98
michael@0 92 */
michael@0 93 virtual ~nsParser();
michael@0 94
michael@0 95 /**
michael@0 96 * Select given content sink into parser for parser output
michael@0 97 * @update gess5/11/98
michael@0 98 * @param aSink is the new sink to be used by parser
michael@0 99 * @return old sink, or nullptr
michael@0 100 */
michael@0 101 NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);
michael@0 102
michael@0 103 /**
michael@0 104 * retrive the sink set into the parser
michael@0 105 * @update gess5/11/98
michael@0 106 * @param aSink is the new sink to be used by parser
michael@0 107 * @return old sink, or nullptr
michael@0 108 */
michael@0 109 NS_IMETHOD_(nsIContentSink*) GetContentSink(void);
michael@0 110
michael@0 111 /**
michael@0 112 * Call this method once you've created a parser, and want to instruct it
michael@0 113 * about the command which caused the parser to be constructed. For example,
michael@0 114 * this allows us to select a DTD which can do, say, view-source.
michael@0 115 *
michael@0 116 * @update gess 3/25/98
michael@0 117 * @param aCommand -- ptrs to string that contains command
michael@0 118 * @return nada
michael@0 119 */
michael@0 120 NS_IMETHOD_(void) GetCommand(nsCString& aCommand);
michael@0 121 NS_IMETHOD_(void) SetCommand(const char* aCommand);
michael@0 122 NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);
michael@0 123
michael@0 124 /**
michael@0 125 * Call this method once you've created a parser, and want to instruct it
michael@0 126 * about what charset to load
michael@0 127 *
michael@0 128 * @update ftang 4/23/99
michael@0 129 * @param aCharset- the charset of a document
michael@0 130 * @param aCharsetSource- the source of the charset
michael@0 131 * @return nada
michael@0 132 */
michael@0 133 NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
michael@0 134
michael@0 135 NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource)
michael@0 136 {
michael@0 137 aCharset = mCharset;
michael@0 138 aSource = mCharsetSource;
michael@0 139 }
michael@0 140
michael@0 141 /**
michael@0 142 * Cause parser to parse input from given URL
michael@0 143 * @update gess5/11/98
michael@0 144 * @param aURL is a descriptor for source document
michael@0 145 * @param aListener is a listener to forward notifications to
michael@0 146 * @return TRUE if all went well -- FALSE otherwise
michael@0 147 */
michael@0 148 NS_IMETHOD Parse(nsIURI* aURL,
michael@0 149 nsIRequestObserver* aListener = nullptr,
michael@0 150 void* aKey = 0,
michael@0 151 nsDTDMode aMode = eDTDMode_autodetect);
michael@0 152
michael@0 153 /**
michael@0 154 * This method needs documentation
michael@0 155 */
michael@0 156 NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
michael@0 157 nsTArray<nsString>& aTagStack);
michael@0 158
michael@0 159 /**
michael@0 160 * This method gets called when the tokens have been consumed, and it's time
michael@0 161 * to build the model via the content sink.
michael@0 162 * @update gess5/11/98
michael@0 163 * @return YES if model building went well -- NO otherwise.
michael@0 164 */
michael@0 165 NS_IMETHOD BuildModel(void);
michael@0 166
michael@0 167 NS_IMETHOD ContinueInterruptedParsing();
michael@0 168 NS_IMETHOD_(void) BlockParser();
michael@0 169 NS_IMETHOD_(void) UnblockParser();
michael@0 170 NS_IMETHOD_(void) ContinueInterruptedParsingAsync();
michael@0 171 NS_IMETHOD Terminate(void);
michael@0 172
michael@0 173 /**
michael@0 174 * Call this to query whether the parser is enabled or not.
michael@0 175 *
michael@0 176 * @update vidur 4/12/99
michael@0 177 * @return current state
michael@0 178 */
michael@0 179 NS_IMETHOD_(bool) IsParserEnabled();
michael@0 180
michael@0 181 /**
michael@0 182 * Call this to query whether the parser thinks it's done with parsing.
michael@0 183 *
michael@0 184 * @update rickg 5/12/01
michael@0 185 * @return complete state
michael@0 186 */
michael@0 187 NS_IMETHOD_(bool) IsComplete();
michael@0 188
michael@0 189 /**
michael@0 190 * This rather arcane method (hack) is used as a signal between the
michael@0 191 * DTD and the parser. It allows the DTD to tell the parser that content
michael@0 192 * that comes through (parser::parser(string)) but not consumed should
michael@0 193 * propagate into the next string based parse call.
michael@0 194 *
michael@0 195 * @update gess 9/1/98
michael@0 196 * @param aState determines whether we propagate unused string content.
michael@0 197 * @return current state
michael@0 198 */
michael@0 199 void SetUnusedInput(nsString& aBuffer);
michael@0 200
michael@0 201 /**
michael@0 202 * This method gets called (automatically) during incremental parsing
michael@0 203 * @update gess5/11/98
michael@0 204 * @return TRUE if all went well, otherwise FALSE
michael@0 205 */
michael@0 206 virtual nsresult ResumeParse(bool allowIteration = true,
michael@0 207 bool aIsFinalChunk = false,
michael@0 208 bool aCanInterrupt = true);
michael@0 209
michael@0 210 //*********************************************
michael@0 211 // These methods are callback methods used by
michael@0 212 // net lib to let us know about our inputstream.
michael@0 213 //*********************************************
michael@0 214 // nsIRequestObserver methods:
michael@0 215 NS_DECL_NSIREQUESTOBSERVER
michael@0 216
michael@0 217 // nsIStreamListener methods:
michael@0 218 NS_DECL_NSISTREAMLISTENER
michael@0 219
michael@0 220 void PushContext(CParserContext& aContext);
michael@0 221 CParserContext* PopContext();
michael@0 222 CParserContext* PeekContext() {return mParserContext;}
michael@0 223
michael@0 224 /**
michael@0 225 * Get the channel associated with this parser
michael@0 226 * @update harishd,gagan 07/17/01
michael@0 227 * @param aChannel out param that will contain the result
michael@0 228 * @return NS_OK if successful
michael@0 229 */
michael@0 230 NS_IMETHOD GetChannel(nsIChannel** aChannel);
michael@0 231
michael@0 232 /**
michael@0 233 * Get the DTD associated with this parser
michael@0 234 * @update vidur 9/29/99
michael@0 235 * @param aDTD out param that will contain the result
michael@0 236 * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
michael@0 237 */
michael@0 238 NS_IMETHOD GetDTD(nsIDTD** aDTD);
michael@0 239
michael@0 240 /**
michael@0 241 * Get the nsIStreamListener for this parser
michael@0 242 */
michael@0 243 virtual nsIStreamListener* GetStreamListener();
michael@0 244
michael@0 245 void SetSinkCharset(nsACString& aCharset);
michael@0 246
michael@0 247 /**
michael@0 248 * Removes continue parsing events
michael@0 249 * @update kmcclusk 5/18/98
michael@0 250 */
michael@0 251
michael@0 252 NS_IMETHODIMP CancelParsingEvents();
michael@0 253
michael@0 254 /**
michael@0 255 * Return true.
michael@0 256 */
michael@0 257 virtual bool IsInsertionPointDefined();
michael@0 258
michael@0 259 /**
michael@0 260 * No-op.
michael@0 261 */
michael@0 262 virtual void BeginEvaluatingParserInsertedScript();
michael@0 263
michael@0 264 /**
michael@0 265 * No-op.
michael@0 266 */
michael@0 267 virtual void EndEvaluatingParserInsertedScript();
michael@0 268
michael@0 269 /**
michael@0 270 * No-op.
michael@0 271 */
michael@0 272 virtual void MarkAsNotScriptCreated(const char* aCommand);
michael@0 273
michael@0 274 /**
michael@0 275 * Always false.
michael@0 276 */
michael@0 277 virtual bool IsScriptCreated();
michael@0 278
michael@0 279 /**
michael@0 280 * Set to parser state to indicate whether parsing tokens can be interrupted
michael@0 281 * @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
michael@0 282 * @update kmcclusk 5/18/98
michael@0 283 */
michael@0 284 void SetCanInterrupt(bool aCanInterrupt);
michael@0 285
michael@0 286 /**
michael@0 287 * This is called when the final chunk has been
michael@0 288 * passed to the parser and the content sink has
michael@0 289 * interrupted token processing. It schedules
michael@0 290 * a ParserContinue PL_Event which will ask the parser
michael@0 291 * to HandleParserContinueEvent when it is handled.
michael@0 292 * @update kmcclusk6/1/2001
michael@0 293 */
michael@0 294 nsresult PostContinueEvent();
michael@0 295
michael@0 296 /**
michael@0 297 * Fired when the continue parse event is triggered.
michael@0 298 * @update kmcclusk 5/18/98
michael@0 299 */
michael@0 300 void HandleParserContinueEvent(class nsParserContinueEvent *);
michael@0 301
michael@0 302 virtual void Reset() {
michael@0 303 Cleanup();
michael@0 304 Initialize();
michael@0 305 }
michael@0 306
michael@0 307 bool IsScriptExecuting() {
michael@0 308 return mSink && mSink->IsScriptExecuting();
michael@0 309 }
michael@0 310
michael@0 311 bool IsOkToProcessNetworkData() {
michael@0 312 return !IsScriptExecuting() && !mProcessingNetworkData;
michael@0 313 }
michael@0 314
michael@0 315 protected:
michael@0 316
michael@0 317 void Initialize(bool aConstructor = false);
michael@0 318 void Cleanup();
michael@0 319
michael@0 320 /**
michael@0 321 *
michael@0 322 * @update gess5/18/98
michael@0 323 * @param
michael@0 324 * @return
michael@0 325 */
michael@0 326 nsresult WillBuildModel(nsString& aFilename);
michael@0 327
michael@0 328 /**
michael@0 329 *
michael@0 330 * @update gess5/18/98
michael@0 331 * @param
michael@0 332 * @return
michael@0 333 */
michael@0 334 nsresult DidBuildModel(nsresult anErrorCode);
michael@0 335
michael@0 336 private:
michael@0 337
michael@0 338 /*******************************************
michael@0 339 These are the tokenization methods...
michael@0 340 *******************************************/
michael@0 341
michael@0 342 /**
michael@0 343 * Part of the code sandwich, this gets called right before
michael@0 344 * the tokenization process begins. The main reason for
michael@0 345 * this call is to allow the delegate to do initialization.
michael@0 346 *
michael@0 347 * @update gess 3/25/98
michael@0 348 * @param
michael@0 349 * @return TRUE if it's ok to proceed
michael@0 350 */
michael@0 351 bool WillTokenize(bool aIsFinalChunk = false);
michael@0 352
michael@0 353
michael@0 354 /**
michael@0 355 * This is the primary control routine. It iteratively
michael@0 356 * consumes tokens until an error occurs or you run out
michael@0 357 * of data.
michael@0 358 *
michael@0 359 * @update gess 3/25/98
michael@0 360 * @return error code
michael@0 361 */
michael@0 362 nsresult Tokenize(bool aIsFinalChunk = false);
michael@0 363
michael@0 364 /**
michael@0 365 * Pushes XML fragment parsing data to expat without an input stream.
michael@0 366 */
michael@0 367 nsresult Parse(const nsAString& aSourceBuffer,
michael@0 368 void* aKey,
michael@0 369 bool aLastCall);
michael@0 370
michael@0 371 protected:
michael@0 372 //*********************************************
michael@0 373 // And now, some data members...
michael@0 374 //*********************************************
michael@0 375
michael@0 376
michael@0 377 CParserContext* mParserContext;
michael@0 378 nsCOMPtr<nsIDTD> mDTD;
michael@0 379 nsCOMPtr<nsIRequestObserver> mObserver;
michael@0 380 nsCOMPtr<nsIContentSink> mSink;
michael@0 381 nsIRunnable* mContinueEvent; // weak ref
michael@0 382
michael@0 383 eParserCommands mCommand;
michael@0 384 nsresult mInternalState;
michael@0 385 nsresult mStreamStatus;
michael@0 386 int32_t mCharsetSource;
michael@0 387
michael@0 388 uint16_t mFlags;
michael@0 389
michael@0 390 nsString mUnusedInput;
michael@0 391 nsCString mCharset;
michael@0 392 nsCString mCommandStr;
michael@0 393
michael@0 394 bool mProcessingNetworkData;
michael@0 395 bool mIsAboutBlank;
michael@0 396 };
michael@0 397
michael@0 398 #endif
michael@0 399

mercurial